def confidence_plot(model, x, xo): p_in = tf.max(tf.nn.softmax(model(x)), axis=1) p_out = tf.max(tf.nn.softmax(model(xo)), axis=1) plt.ylabel('Frequency') plt.xlabel('Confidence') plt.xlim([0, 1]) plt.hist(p_in, bins=20, color='blue', label='In', alpha=.5) plt.hist(p_out, bins=20, color='red', label='Out', alpha=.5) plt.legend() return 0
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: with tf.GradientTape(): loss = closure() for group in self.param_groups: for p in group['params']: param_norm = tf.max(unitwise_norm(p), tf.Variable(group['eps']).to(p.device)) grad_norm = unitwise_norm(p.grad) max_norm = param_norm * group['clipping'] trigger = grad_norm > max_norm clipped_grad = p.grad * \ (max_norm / tf.max(grad_norm, tf.tensor(1e-6).to(grad_norm.device))) p.grad.data.copy_(tf.where(trigger, clipped_grad, p.grad)) for group in self.param_groups: weight_decay = group['weight_decay'] momentum = group['momentum'] dampening = group['dampening'] nesterov = group['nesterov'] for p in group['params']: if p.grad is None: continue d_p = p.grad if weight_decay != 0: d_p = d_p.add(p, alpha=weight_decay) if momentum != 0: param_state = self.state[p] if 'momentum_buffer' not in param_state: buf = param_state['momentum_buffer'] = d_p.numpy() else: buf = param_state['momentum_buffer'] buf.mul_(momentum).add_(d_p, alpha=1 - dampening) if nesterov: d_p = d_p.add(buf, alpha=momentum) else: d_p = buf p.add_(d_p, alpha=-group['lr']) return loss
def predict(self, x): '''predict label in volatile mode Args: x: Size=>[batch_size, self.dataset.k + self.dataset.d], Type=>Variable(FloatTensor), volatile ''' return tf.max(self.D(x, cuda=self.args.cuda), 1)[1].data
def cal_iou(bboxes_a, bboxes_b): '''calculate iou betwwen two groups of bboxes args: bboxes_a a numpy array of bbox coords (with boundary coords) bboxes_b a numpy array of bbox coords (with boundary coords) the two array should have the same shape (None, 4) returns: a numpy array of shape (None, 1) ''' max_min = tf.min(bboxes_a[:, 2:], bboxes_b[:, 2:]) min_max = tf.max(bboxes_a[:, :2], bboxes_b[:, :2]) mul = (max_min - min_max) mul = mul * (mul > 0) inter = mul[:, 0] * mul[:, 1] union = (bboxes_a[:, 2] - bboxes_a[:, 0]) * (bboxes_a[:, 3] - bboxes_a[:, 1]) + \ (bboxes_b[:, 2] - bboxes_b[:, 0]) * (bboxes_b[:, 3] - bboxes_b[:, 1]) - inter iou = inter / union return iou
def qa(x): name = x.op.name ema_name = name + '_ema' c = tf.get_variable(ema_name, initializer=0.0, dtype=tf.float32) max_a = tf.max(x) temp_c = max_a new_c = tf.cond(tf.get_global_step() == 0, temp_c, c * 0.99 + temp_c * 0.01, name=ema_name + '_new') #op = tf.assign(c, new_c, use_locking=False).op tf.add_to_collection('new_cs', new_c) n = 2**bitA - 1 lower = new_c * 0.05 upper = new_c * 0.95 x_temp = tf.conf(x < lower, tf.zeros_like(x), tf.clip_by_value(x, lower, upper * 0.9999)) x_temp = (n / 0.9 * new_c) * x_temp + (0.5 - (0.5 * n / 9)) x_temp = tf.round(x_temp) x_temp = x_temp / n * upper return x_temp, lambda dy: dy
def sequence_mask(lengths, maxlen=None, dtype=tf.bool, name=None): """Same as sequence_mask in version 1.1 """ with tf.name_scope(name or "SequenceMask"): lengths = tf.convert_to_tensor(lengths) if lengths.get_shape().ndims != 1: raise ValueError("lengths must be 1D for sequence_mask") if maxlen is None: maxlen = tf.max(lengths, [0]) else: maxlen = tf.convert_to_tensor(maxlen) if maxlen.get_shape().ndims != 0: raise ValueError("maxlen must be scalar for sequence_mask") # The basic idea is to compare a range row vector of size maxlen: # [0, 1, 2, 3, 4] # to length as a matrix with 1 column: [[1], [3], [2]]. # Because of broadcasting on both arguments this comparison results # in a matrix of size (len(lengths), maxlen) result = tf.range(0, maxlen, 1) < tf.expand_dims(lengths, 1) if dtype is None or result.dtype.base_dtype == dtype.base_dtype: return result else: return tf.cast(result, dtype)
def get_cell_sampling_probas(attractivity_cells, square_ids_cells): unique_square_ids, inverse, counts = tf.unique(square_ids_cells, return_inverse=True, return_counts=True) # `inverse` is an re-numering of `square_ids_cells` following its order: 3, 4, 6 => 0, 1, 2 width_sample = tf.max(counts) print(f'width_sample: {width_sample}') # create a sequential index dor the cells in the squares: # 1, 2, 3... for the cells in the first square, then 1, 2, .. for the cells in the second square # Trick: 1. shift `counts` one to the right, remove last element and append 0 at the beginning: cell_index_shift = tf.insert(counts, 0, 0)[:-1] cell_index_shift = tf.cumsum(cell_index_shift) # [0, ncells in square0, ncells in square 1, etc...] to_subtract = tf.repeat(cell_index_shift, counts) # repeat each element as many times as the corresponding square has cells inds_cells_in_square = tf.arange(0, attractivity_cells.shape[0]) inds_cells_in_square = tf.subtract(inds_cells_in_square, to_subtract) # we have the right sequential order order = tf.argsort(inverse) inverse = inverse[order] attractivity_cells = attractivity_cells[order] # Create `sample_arr`: one row for each square. The values first value in each row are the attractivity of its cell. Padded with 0. cell_sampling_probas = tf.zeros((unique_square_ids.shape[0], width_sample)) cell_sampling_probas[inverse, inds_cells_in_square] = attractivity_cells # Normalize the rows of `sample_arr` s.t. the rows are probability distribution cell_sampling_probas /= tf.linalg.norm(cell_sampling_probas, ord=1, axis=1, keepdims=True).astype(tf.float32) return cell_sampling_probas, cell_index_shift
def train(opt, dset, model, criterion, optimizer, epoch, previous_best_acc): dset.set_mode("train") model.train() train_loader = DataLoader(dset, batch_size=opt.bsz, shuffle=True, collate_fn=pad_collate) train_loss = [] valid_acc_log = ["batch_idx\tacc"] train_corrects = [] torch.set_grad_enabled(True) for batch_idx, batch in tqdm(enumerate(train_loader)): model_inputs, targets, _ = preprocess_inputs(batch, opt.max_sub_l, opt.max_vcpt_l, opt.max_vid_l, device=opt.device) outputs = model(*model_inputs) loss = criterion(outputs, targets) # optimizer.zero_grad() # loss.backward() # optimizer.step() # measure accuracy and record loss train_loss.append(loss.item()) pred_ids = tf.max(outputs, 1)[1] train_corrects = pred_ids[tf.where(pred_ids=targets)] # pred_ids = outputs.data.max(1)[1] # train_corrects += pred_ids.eq(targets.data).cpu().numpy().tolist() if batch_idx % opt.log_freq == 0: niter = epoch * len(train_loader) + batch_idx train_acc = sum(train_corrects) / float(len(train_corrects)) train_loss = sum(train_loss) / float(len(train_corrects)) opt.writer.add_scalar("Train/Acc", train_acc, niter) opt.writer.add_scalar("Train/Loss", train_loss, niter) # Test valid_acc, valid_loss = validate(opt, dset, model, mode="valid") opt.writer.add_scalar("Valid/Loss", valid_loss, niter) valid_log_str = "%02d\t%.4f" % (batch_idx, valid_acc) valid_acc_log.append(valid_log_str) if valid_acc > previous_best_acc: previous_best_acc = valid_acc torch.save(model.state_dict(), os.path.join(opt.results_dir, "best_valid.pth")) print(" Train Epoch %d loss %.4f acc %.4f Val loss %.4f acc %.4f" % (epoch, train_loss, train_acc, valid_loss, valid_acc)) # reset to train torch.set_grad_enabled(True) model.train() dset.set_mode("train") train_corrects = [] train_loss = [] if opt.debug: break # additional log with open(os.path.join(opt.results_dir, "valid_acc.log"), "a") as f: f.write("\n".join(valid_acc_log) + "\n") return previous_best_acc
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = tf.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box))) return b + 1, ignore_mask
def masked_softmax(vec, mask, dim=1): masked_vec = vec * mask max_vec = tf.max(masked_vec, dim=dim, keepdim=True)[0] exps = tf.exp(masked_vec, -max_vec) masked_exps = exps * mask masked_sums = masked_exps.sum(dim, keepdim=True) zeros = (masked_sums == 0) masked_sums += zeros return masked_exps / (masked_sums + 1e-20)
def _loss_def(self): '''Initializes the loss function.''' def scores(h, t, l): s = self._score(h, t, l) # [b,n] return mean(s, 1) # [b] p = scores(*self._positive_instance(in_batch=True)) # [b] n = scores(*self._negative_instance(in_batch=True)) # [b] return sum(max(p - n + self.margin, 0)) # []
def get_h_tile(cls, s, s1): """ attended vectors of s1 which words in s1 is most similar to each words in s2 """ t1 = s1.shape[1] b_weight = tf.reshape(tf.softmax(tf.max(s, 2)[0], -1), [-1, 1]) h_tile = tf.tile(tf.matmul(b_weight, s1), [1, t1, 1]) # b_weight = F.softmax(torch.max(s, dim=2)[0], dim=-1).unsqueeze(1) # [b, t2] # h_tile = torch.bmm(b_weight, s1).repeat(1, t1, 1) # repeat to match s1 # [B, t1, D] return h_tile
def _tonemap(self, image, mode='NONE', **kwargs): mode = mode.upper() if mode=='NONE': image_sdr = image elif mode=='CLIP_NEGATIVE': image_sdr = tf.maximum(image, 0) elif mode=='SATURATE': image_sdr = tf.clip_by_value(image, 0, 1) elif mode=='NORMALIZE': min = tf.min(image) if min<0: image -= min max = tf.max(image) if max>0: image_sdr = image/max return image_sdr
def norm_data(data, norm_app="min/max"): ##### norm to (0,1) if norm_app == "min/max": min_val = tf.reduce_min(data, axis=1) max_val = tf.reduce_max(data, axis=1) return (data - min_val) / (max_val - min_val) ##### norm to (-1,1) elif norm_app == "neg1/pos1": return data / tf.max(tf.abs(data)) else: print("Unsupported norm method! Your options: min/max or neg1/pos1") print("Using min/max instead.") min_val = tf.reduce_min(data, axis=1) max_val = tf.reduce_max(data, axis=1) return (data - min_val) / (max_val - min_val)
def optimize(self, loss, num_async_replicas=1): """Return a training op minimizing loss.""" tf.logging.info("Base learning rate: %f", self.hparams.learning_rate) lr = self.hparams.learning_rate decay_rate = optimize.learning_rate_schedule(self.hparams) lr *= decay_rate if self.hparams.learning_rate_minimum: lr_min = float(self.hparams.learning_rate_minimum) tf.logging.info("Applying learning rate minimum: %f", lr_min) lr = tf.max(lr, tf.to_float(lr_min)) if num_async_replicas > 1: tf.logging.info("Dividing learning rate by num_async_replicas: %d", num_async_replicas) lr /= math.sqrt(float(num_async_replicas)) train_op = optimizer.optimize(loss, lr, self.hparams) return train_op
def __init__(self, batch_size, x_size, size, learning_rate, suumary_path): self._lr = learning_rate self.summary_path = suumary_path self.config = tf.ConfigProto(allow_soft_placement=True) self.sess = tf.Session(config=self.config) self.x = tf.placeholder(tf.float32, [batch_size, x_size]) self.y = tf.placeholder(tf.float32, [batch_size, 1]) self.keep_prob = tf.placeholder(tf.float32, [1]) self.w = [] self.b = [] with tf.variable_scope('neuron', reuse=tf.AUTO_REUSE): for i, s in enumerate(size): if i == 0: self.w.append(tf.get_variable('weight'+str(i), [x_size, s], tf.float32, initializer=tf.random_normal_initializer())) else: self.w.append(tf.get_variable('weight' + str(i), [size[i-1], s], tf.float32, initializer=tf.random_normal_initializer())) self.b.append(tf.get_variable('bias'+str(i), [s], tf.float32, initializer=tf.constant_initializer(1))) self.w.append(tf.get_variable('output_weight', [size[-1], 1], tf.float32, initializer=tf.random_normal_initializer())) self.b.append(tf.get_variable('output_bias', [1], tf.float32, initializer=tf.constant_initializer(1))) with tf.variable_scope('result', reuse=tf.AUTO_REUSE): self.output = tf.get_variable('output', [batch_size, x_size], tf.float32) self.output = self.x for one_w, one_b in zip(self.w, self.b): # Using ReLU neuron self.output = tf.add(tf.max(tf.matmul(self.output, one_w), 0), one_b) self.output = tf.nn.dropout(self.output, keep_prob=self.keep_prob) self.loss = tf.sqrt(tf.losses.mean_squared_error(self.y, self.output)) self.optimizer = tf.train.AdamOptimizer(self._lr) self.train = self.optimizer.minimize(self.loss) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() tf.summary.scalar('loss', self.loss) self.summary = tf.summary.merge_all() self.writer = tf.summary.FileWriter(self.summary_path)
def tf_apply(self, x, update): if self.name == 'elu': x = tf.nn.elu(features=x) elif self.name == 'none': x = tf.identity(input=x) elif self.name == 'relu': x = tf.nn.relu(features=x) if 'relu' in self.summary_labels: non_zero = tf.cast(x=tf.count_nonzero(input_tensor=x), dtype=tf.float32) size = tf.cast(x=tf.reduce_prod(input_tensor=tf.shape(input=x)), dtype=tf.float32) summary = tf.summary.scalar(name='relu', tensor=(non_zero / size)) self.summaries.append(summary) elif self.name == 'leakyrelu': alpha = 0.03 # TODO: parameter x = tf.max(alpha * x, x) elif self.name == 'selu': # https://arxiv.org/pdf/1706.02515.pdf alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 negative = alpha * tf.nn.elu(features=x) x = scale * tf.where(condition=(x >= 0.0), x=x, y=negative) elif self.name == 'sigmoid': x = tf.sigmoid(x=x) elif self.name == 'softmax': x = tf.nn.softmax(logits=x) elif self.name == 'softplus': x = tf.nn.softplus(features=x) elif self.name == 'tanh': x = tf.nn.tanh(x=x) else: raise TensorForceError('Invalid non-linearity: {}'.format(self.name)) return x
def create_dqn(self): self.Xs = tensorflow.placeholder(tensorflow.uint8, shape=[None, 84, 84, 4]) float_Xs = tensorflow.to_float(self.Xs) / 255 conv_1 = tensorflow.layers.conv2d(float_Xs, filters=32, kernel_size=8, strides=4, activation=tensorflow.nn.relu) conv_2 = tensorflow.layers.conv2d(conv_1, filters=64, kernel_size=4, strides=2, activation=tensorflow.nn.relu) conv_3 = tensorflow.layers.conv2d(conv_2, filters=64, kernel_size=3, activation=tensorflow.nn.relu) flat = tensorflow.layers.flatten(conv_3) fc = tensorflow.layers.dense(flat, units=512, activation=tensorflow.nn.relu) q_values = tensorflow.layers.dense(fc, units=self.env.action_space.n) self.argmax_action = tensorflow.argmax(q_values, axis=1) self.max_action = tensorflow.max(q_values, axis=1) self.ys = tensorflow.placeholder(tensorflow.float, shape=[self.MINIBATCH_SIZE]) self.actions = tensorflow.placeholder(tensorflow.int, shape=[self.MINIBATCH_SIZE]) self.loss = tensorflow.reduce_mean( tensorflow.squared_difference( self.ys, tensorflow.gather(q_values, self.actions, axis=1))) optimizer = tensorflow.train.RMSPropOptimizer( learning_rate=self.ALHPA, decay=1, # do not decay learning rate momentum=self.GRADIENT_MOMENTUM, epsilon=self.MIN_SQRT_MOMENTUM, ) # gradient operator self.train = optimizer.minimize(self.loss)
def calibration_plot(model, x, y, ece): py_x = tf.nn.softmax(model(x)) p = tf.max(py_x, axis=1) hat_y = tf.argmax(py_x, axis=1) y = tf.argmax(y, axis=1) acc = tf.cast(hat_y == y, tf.float32) idx = tf.argsort(p) p = p[idx] acc = acc[idx] plt.title(f'Calibration {model.name}: {ece}') plt.ylabel('Frequency') plt.xlabel('ACC/Conf') plt.xlim([0, 1]) plt.hist(acc, bins=20, color='blue', label='accuracy', alpha=.5) plt.hist(p, bins=20, color='red', label='confidence', alpha=.5) plt.legend() return 0
def __init__(self, config): self.im_raw = im_raw = tf.placeholder('float32', [None, 32, 32, 3]) im_resize = tf.image.resize_images(im_raw, 224, 224) net, _, _ = vggf.construct_net(config.vgg_path, im_resize, config.codelens) self.net = net self.S = S = tf.placeholder('float32', [None, None]) self.lrx = lrx = tf.placeholder('float32', ()) self.Ux = Ux = tf.placeholder('float32', [None, config.codelens]) U0 = net['fc8'] theta = tf.mul(1.0 / 2, tf.matmul(U0, tf.transpose(Ux))) B_code = tf.sign(U0) loss = tf.div((-2.0 * tf.reduce_sum( tf.mul(S, theta) - (tf.max(0, theta) + tf.log(tf.exp(tf.abs(-theta)) + 1)))) + config.lamda * tf.reduce_sum(tf.pow((B_code - U0), 2)), float(config.N_size * config.batch_size)) self.train_step = tf.train.GradientDescentOptimizer(lrx).minimize(loss)
def call(self, x): assert isinstance(x, list) assert len(x) == 2 x, mask = x h, w = x.get_shape().as_list()[1:3] mask_s = tf.image.resize_nearest_neighbor(1 - mask[:, :, :, 0:1], [h, w]) x_known_cnt = tf.max(self.eps, tf.reduce_sum(mask_s, [1, 2], keep_dims=True)) x_known_mean = tf.reduce_sum(x * mask_s, [1, 2], keep_dims=True) / x_known_cnt x_known_variance = tf.reduce_sum((x * mask_s - x_known_mean) ** 2, [1, 2], keep_dims = True) / x_known_cnt mask_s_rev = 1 - mask_s x_unknown_cnt = tf.maximun(eps, tf.reduce_sum(mask_s_rev, [1, 2], keep_dims=True)) x_unknown_mean = tf.reduce_sum(x * mask_s_rev, [1, 2], keep_dims=True) / x_unknown_cnt x_unknown_variance = tf.reduce_sum(x * mask_s_rev - x_unknown_mean) ** 2, [1, 2], keep_dims=True) / x_unknown_cnt x_unknown = self.alpha * tf.nn.batch_normalization(x * mask_s_rev, x_unknown_mean, x_unknown_variance, x_known_mean, tf.sqrt(x_known_variance), self.eps) + (1 - self.alpha) * x * mask_s_rev
def neural_network(): #------------------encoder------------------# e_w_1 = tf.Variable(tf.truncated_normal([520, 256], stddev = 0.1)) e_b_1 = tf.Variable(tf.constant(0.0, shape=[256])) e_w_2 = tf.ValueError(tf.truncated_normal([256,128], stddev = 0.1)) e_b_2 = tf.Variable(tf.constant(0.0, shape=[128])) e_w_3 = tf.Variable(tf.truncated_normal([128,64]), stddev = 0.1) e_b_3 = tf.Variable(tf.constant(0.0, shape=[64])) #------------------Decoder------------------# d_w_1 = tf.Variable(tf.truncated_normal([64,128]), stddev = 0.1) d_b_1 = tf.Variable(tf.constant(0.0, shape=[128])) d_w_2 = tf.Variable(tf.truncated_normal([128,256]), stddev = 0.1) d_b_2 = tf.Variable(tf.constant(0.0, shape=[256])) d_w_3 = tf.Variable(tf.truncated_normal([256,520]), stddev = 0.1) d_b_3 = tf.Variable(tf.constant(0.0, shape=[520])) #------------------DNN------------------# w_1 = tf.Variable(tf.truncated_normal([64,128]), stddev=0.1) b_1 = tf.Variable(tf.constant(0.0, shape=[128])) w_2 = tf.Variable(tf.truncated_normal([128,128]), stddev=0.1) b_2 = tf.Variable(tf.constant(0.0, shape=[128])) w_3 = tf.Variable(tf.truncated_normal([128,output]), stddev=0.1) b_3 = tf.Variable(tf.constant(0.0, shape=[output])) ##################################################### layer_1 = tf.nn.tanh(tf.add(tf.matmul(X, e_w_1), e_b_1)) layer_2 = tf.nn.tanh(tf.add(tf.matmul(layer_1, e_w_2), e_b_2)) encoded = tf.nn.tanh(tf.add(tf.matmul(layer_2, e_w_3), e_b_3)) layer_4 = tf.nn.tanh(tf.add(tf.matmul(encoded, d_w_1), d_b_1)) layer_5 = tf.nn.tanh(tf.add(tf.matmul(layer_4, d_w_2), d_b_2)) decoded = tf.nn.tanh(tf.add(tf.matmul(layer_5, d_w_3), d_b_3)) layer_7 = tf.nn.tanh(tf.add(tf.matmul(decoded, w_1), b_1)) layer_8 = tf.nn.tanh(tf.add(tf.matmul(layer_7, w_2), b_2)) out = tf.nn.softmax(tf.max(tf.matmul(layer_8, w_3), b_3)) return (decoded, out)
def hinge_loss(y_true, y_pred): return tf.add(tf.max(0.0, 0.5 - tf.multiply(polarity(y_pred), polarity(y_true))), tf.max(0.0, 0.5 - tf.multiply(polarity(1.0-y_pred), polarity(1.0-y_true))))
def grad(dy): return dy * tf.max(0, 1-beta*tf.abs(x))
def relu(): return tf.max(self.input, 0)
def max(self, axis: Optional[int]=None) -> 'ITensor': return Tensor(native=tf.max(self.native, axis=axis))
def prelu(inputs, is_training, scope): with tf.variable_scope(scope): a = tf.Variable(0.25 * tf.ones([inputs.shape[-1]]), name="a") return tf.max(0, inputs) + tf.multiply(a, tf.min(0, inputs))
def __init__(self, params, is_training=False): batch_size = params['batch_size'] # DATA WILL BE ARRANGED IN BATCHES seq_len = params['seq_len'] span_size = params['span_size'] # number of tokens per span vocab_size = params['vocab_size'] nchars = nbooks = ntopics = d_word = params['d_hid'] # also embedding dimension d_char d_book self.nlayers = nlayers = params['nlayers'] drop_prob = params['drop_prob'] # how to do in batch? if it's a user-subreddit pair / network, then can't # do batch because need sequential -> lay out each user-subreddit pair sequentially # TODO initializations # TODO dropout; masks? # TODO end of char x char token self._span = tf.placeholder(tf.int32, shape=[None, span_size]) self._negs = tf.placeholder(tf.int32, shape=[n_negs, span_size]) self._user = tf.placeholder(tf.int32, shape=[2]) # two characters? self._book = tf.placeholder(tf.int32, shape=[1]) ''' In each variable scope, Lookups + average/sum/reshape Linear + relu Note: original paper doesn't really concat, does it in this way ''' with tf.variable_scope("span"): # TODO load w2v lookup_table = tf.get_variable("lookup_table", [vocab_size, d_word], \ trainable=False) # seq x span_size word_embeds = tf.nn.embedding_lookup(lookup_table, self._span) # seq x span_size x d_w span_embeds = tf.reduce_mean(word_embeds, 1) # seq x d_word W_w = tf.get_variable('W', [d_word, d_word]) b_w = tf.get_variable('b', [d_word]) # initialzed to zero lin_w = tf.matmul(W_w, span_embeds) + b_w with tf.variable_scope("char"): lookup_table = tf.get_variable("lookup_table", [nchars, d_char]) char_embeds = tf.nn.embedding_lookup(lookup_table, self._user) shaped_char_embeds = tf.reduce_sum(char_embeds, [d_char, -1]) # d_char W_c = tf.get_variable('W', [d_char, d_word]) lin_c = tf.matmul(W_c, shaped_char_embeds) with tf.variable_scope("book"): lookup_table = tf.get_variable("lookup_table", [nbooks, d_book]) # batch x seq book_embed = tf.nn.embedding_lookup(lookup_table, self._book) # batch x seq x d_b W_b = tf.get_variable('W', [d_book, d_word]) lin_b = tf.matmul(W_b, book_embed) linear = lin_w + lin_c + lin_b h_t = tf.nn.relu(linear) # may need to reshape cell = ConcatRNN(d_word) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.variable_scope("RNN"): outputs, state = tf.nn.rnn(cell, all_embeds, dtype=tf.float32,\ initial_state=self._initial_state) self._last_state = state # Output shape should be (seq_len x d_word) outputs = tf.reshape(tf.concat(1, outputs), [-1, d_word]) # reconstruction R = tf.get_variable("descriptor_dict", [ntopics, d_word]) recons = tf.matmul(R, outputs) # max-margin loss w/ similarity penalty pos_vecs = tf.tile(tf.reduce_sum(tf.mul(span_embeds, recons), 1), [n_negs, 1]) neg_vecs = tf.matmul(neg_spans, tf.transpose(recons)) J = tf.reduce_sum(tf.max(0., 1. - pos_vecs + neg_vecs)) # The true penalty I think is this, but they use the uncommented line #X = tf.sqrt(tf.reduce_sum(tf.square(tf.matmul(R, tf.transpose(R)) - identity))) identity = tf.Variable(np.identity(ntopics), trainable=False) norm_R = tf.div(R, tf.sqrt(tf.reduce_sum(tf.square(R)))) X = tf.reduce_sum(tf.square(tf.matmul(norm_R, tf.transpose(norm_R)) - identity)) self._loss = J + unique_scale * X self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), params['max_grad_norm']) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars)) self.debug = [logits]
def call(self, x, mask=None): assert(len(x) == 2) img = x[0] rois = x[1] input_shape = tf.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'th': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = tf.cast(x1, 'int32') x2 = tf.cast(x2, 'int32') y1 = tf.cast(y1, 'int32') y2 = tf.cast(y2, 'int32') x2 = x1 + tf.maximum(1,x2-x1) y2 = y1 + tf.maximum(1,y2-y1) new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = img[:, :, y1:y2, x1:x2] xm = tf.reshape(x_crop, new_shape) pooled_val = tf.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': x = tf.cast(x, 'int32') y = tf.cast(y, 'int32') w = tf.cast(w, 'int32') h = tf.cast(h, 'int32') rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = tf.keras.layers.concatenate(outputs, axis=0) final_output = tf.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'th': final_output = tf.keras.backend.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = tf.keras.backend.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def filter_detections(boxes, classification, other=[], class_specific_filter=True, nms=True, score_threshold=0.05, max_detections=300, nms_threshold=0.5): """ Filter detections using the boxes and classification values. Args boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. classification : Tensor of shape (num_boxes, num_classes) containing the classification scores. other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those. nms : Flag to enable/disable non maximum suppression. score_threshold : Threshold used to prefilter the boxes with. max_detections : Maximum number of detections to keep. nms_threshold : Threshold for the IoU value to determine when a box should be suppressed. Returns A list of [boxes, scores, labels, other[0], other[1], ...]. boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. scores is shaped (max_detections,) and contains the scores of the predicted class. labels is shaped (max_detections,) and contains the predicted label. other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. In case there are less than max_detections detections, the tensors are padded with -1's. """ def _filter_detections(scores, labels): # threshold based on score indices = tf.where(tfgreater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = tf.gather(scores, indices)[:, 0] # perform NMS nms_indices = tf.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = tf.gather(indices, nms_indices) # add indices to list of all indices labels = tf.gather_nd(labels, indices) indices = tf.stack([indices[:, 0], labels], axis=1) return indices if class_specific_filter: all_indices = [] # perform per class filtering for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * tf.ones((tf.shape(scores)[0], ), dtype='int64') all_indices.append(_filter_detections(scores, labels)) # concatenate indices to single tensor indices = tf.concatenate(all_indices, axis=0) else: scores = tf.max(classification, axis=1) labels = tf.argmax(classification, axis=1) indices = _filter_detections(scores, labels) # select top k scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.top_k(scores, k=tf.minimum(max_detections, tf.shape(scores)[0])) # filter input using the final set of indices indices = tf.gather(indices[:, 0], top_indices) boxes = tf.gather(boxes, indices) labels = tf.gather(labels, top_indices) other_ = [tf.gather(o, indices) for o in other] # zero pad the outputs pad_size = tf.maximum(0, max_detections - tf.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = tf.cast(labels, 'int32') other_ = [ tf.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_ ] # set shapes, since we know what they are boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(tf.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_