def build_graph(self, image, label): xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE) for x in range(WARP_TARGET_SIZE)], dtype='float32') xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 image = image / 255.0 - 0.5 # bhw2 def get_stn(image): stn = (LinearWrap(image) .AvgPooling('downsample', 2) .Conv2D('conv0', 20, 5, padding='VALID') .MaxPooling('pool0', 2) .Conv2D('conv1', 20, 5, padding='VALID') .FullyConnected('fc1', 32) .FullyConnected('fct', 6, activation=tf.identity, kernel_initializer=tf.constant_initializer(), bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) # output 6 parameters for affine transformation stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) coor = tf.reshape(tf.matmul(xys, stn), [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2]) coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2 sampled = BilinearSample('warp', [image, coor], borderMode='constant') return sampled with argscope([Conv2D, FullyConnected], activation=tf.nn.relu): with tf.variable_scope('STN1'): sampled1 = get_stn(image) with tf.variable_scope('STN2'): sampled2 = get_stn(image) # For visualization in tensorboard with tf.name_scope('visualization'): padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1) # b x 2h x w transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1) transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1) stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz') tf.summary.image('visualize', tf.expand_dims(stacked, -1), max_outputs=30) sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') logits = (LinearWrap(sampled) .FullyConnected('fc1', 256, activation=tf.nn.relu) .FullyConnected('fc2', 128, activation=tf.nn.relu) .FullyConnected('fct', 19, activation=tf.identity)()) tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector') summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error')) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') summary.add_moving_summary(cost, wd_cost) return tf.add_n([wd_cost, cost], name='cost')
def multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits): """ Args: multilevel_anchors: #lvl RPNAnchors multilevel_label_logits: #lvl tensors of shape HxWxA multilevel_box_logits: #lvl tensors of shape HxWxAx4 Returns: label_loss, box_loss """ num_lvl = len(cfg.FPN.ANCHOR_STRIDES) assert len(multilevel_anchors) == num_lvl assert len(multilevel_label_logits) == num_lvl assert len(multilevel_box_logits) == num_lvl losses = [] with tf.name_scope('rpn_losses'): for lvl in range(num_lvl): anchors = multilevel_anchors[lvl] label_loss, box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), multilevel_label_logits[lvl], multilevel_box_logits[lvl], name_scope='level{}'.format(lvl + 2)) losses.extend([label_loss, box_loss]) total_label_loss = tf.add_n(losses[::2], name='label_loss') total_box_loss = tf.add_n(losses[1::2], name='box_loss') add_moving_summary(total_label_loss, total_box_loss) return total_label_loss, total_box_loss
def _build_graph(self, inputs, is_training): state, action, reward, next_state, isOver = inputs self.predict_value = self._get_DQN_prediction(state, is_training) action_onehot = tf.one_hot(action, NUM_ACTIONS) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') add_moving_summary(max_pred_reward) self.greedy_choice = tf.argmax(self.predict_value, 1) # N, with tf.variable_scope('target'): targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA # DQN #best_v = tf.reduce_max(targetQ_predict_value, 1) # N, # Double-DQN predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v) sqrcost = tf.square(target - pred_action_value) abscost = tf.abs(target - pred_action_value) # robust error func cost = tf.select(abscost < 1, sqrcost, abscost) summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms']) ]) # monitor all W self.cost = tf.reduce_mean(cost, name='cost')
def _build_graph(self, inputs): input, nextinput = inputs cell = rnn.MultiRNNCell([rnn.LSTMBlockCell(num_units=param.rnn_size) for _ in range(param.num_rnn_layer)]) def get_v(n): ret = tf.get_variable(n + '_unused', [param.batch_size, param.rnn_size], trainable=False, initializer=tf.constant_initializer()) ret = tf.placeholder_with_default(ret, shape=[None, param.rnn_size], name=n) return ret initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')), rnn.LSTMStateTuple(get_v('c1'), get_v('h1'))) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize) outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm') last_state = tf.identity(last_state, 'last_state') # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) tf.nn.softmax(logits / param.softmax_temprature, name='prob') xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.reshape(nextinput, [-1])) self.cost = tf.reduce_mean(xent_loss, name='cost') summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W summary.add_moving_summary(self.cost)
def _build_graph(self, inputs): input, output = inputs input, output = input / 128.0 - 1, output / 128.0 - 1 with argscope([Conv2D, Deconv2D], W_init=tf.truncated_normal_initializer(stddev=0.02)), \ argscope(LeakyReLU, alpha=0.2): with tf.variable_scope('gen'): fake_output = self.generator(input) with tf.variable_scope('discrim'): real_pred = self.discriminator(input, output) with tf.variable_scope('discrim', reuse=True): fake_pred = self.discriminator(input, fake_output) self.build_losses(real_pred, fake_pred) errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss') self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss') add_moving_summary(errL1, self.g_loss) # tensorboard visualization if IN_CH == 1: input = tf.image.grayscale_to_rgb(input) if OUT_CH == 1: output = tf.image.grayscale_to_rgb(output) fake_output = tf.image.grayscale_to_rgb(fake_output) viz = (tf.concat([input, output, fake_output], 2) + 1.0) * 128.0 viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') tf.summary.image('input,output,fake', viz, max_outputs=max(30, BATCH)) self.collect_variables()
def build_graph(self, image_pos): image_pos = image_pos / 128.0 - 1 z = tf.random_normal([self.batch, self.zdim], name='z_train') z = tf.placeholder_with_default(z, [None, self.zdim], name='z') with argscope([Conv2D, Conv2DTranspose, FullyConnected], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): image_gen = self.generator(z) tf.summary.image('generated-samples', image_gen, max_outputs=30) alpha = tf.random_uniform(shape=[self.batch, 1, 1, 1], minval=0., maxval=1., name='alpha') interp = image_pos + alpha * (image_gen - image_pos) with tf.variable_scope('discrim'): vecpos = self.discriminator(image_pos) vecneg = self.discriminator(image_gen) vec_interp = self.discriminator(interp) # the Wasserstein-GAN losses self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss') self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss') # the gradient penalty loss gradients = tf.gradients(vec_interp, [interp])[0] gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3])) gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms') gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty') add_moving_summary(self.d_loss, self.g_loss, gradient_penalty, gradients_rms) self.d_loss = tf.add(self.d_loss, 10 * gradient_penalty) self.collect_variables()
def fpn_map_rois_to_levels(boxes): """ Assign boxes to level 2~5. Args: boxes (nx4): Returns: [tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level. [tf.Tensor]: 4 tensors, the gathered boxes in each level. Be careful that the returned tensor could be empty. """ sqrtarea = tf.sqrt(tf_area(boxes)) level = tf.to_int32(tf.floor( 4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2)))) # RoI levels range from 2~5 (not 6) level_ids = [ tf.where(level <= 2), tf.where(tf.equal(level, 3)), # == is not supported tf.where(tf.equal(level, 4)), tf.where(level >= 5)] level_ids = [tf.reshape(x, [-1], name='roi_level{}_id'.format(i + 2)) for i, x in enumerate(level_ids)] num_in_levels = [tf.size(x, name='num_roi_level{}'.format(i + 2)) for i, x in enumerate(level_ids)] add_moving_summary(*num_in_levels) level_boxes = [tf.gather(boxes, ids) for ids in level_ids] return level_ids, level_boxes
def build_losses(self, logits_real, logits_fake): """D and G play two-player minimax game with value function V(G,D) min_G max _D V(D, G) = IE_{x ~ p_data} [log D(x)] + IE_{z ~ p_fake} [log (1 - D(G(z)))] Args: logits_real (tf.Tensor): discrim logits from real samples logits_fake (tf.Tensor): discrim logits from fake samples produced by generator """ with tf.name_scope("GAN_loss"): score_real = tf.sigmoid(logits_real) score_fake = tf.sigmoid(logits_fake) tf.summary.histogram('score-real', score_real) tf.summary.histogram('score-fake', score_fake) with tf.name_scope("discrim"): d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_real, labels=tf.ones_like(logits_real)), name='loss_real') d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_fake, labels=tf.zeros_like(logits_fake)), name='loss_fake') d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real') d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake') d_accuracy = tf.add(.5 * d_pos_acc, .5 * d_neg_acc, name='accuracy') self.d_loss = tf.add(.5 * d_loss_pos, .5 * d_loss_neg, name='loss') with tf.name_scope("gen"): self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits_fake, labels=tf.ones_like(logits_fake)), name='loss') g_accuracy = tf.reduce_mean(tf.cast(score_fake > 0.5, tf.float32), name='accuracy') add_moving_summary(self.g_loss, self.d_loss, d_accuracy, g_accuracy)
def build_graph(self, input, output): input, output = input / 128.0 - 1, output / 128.0 - 1 with argscope([Conv2D, Conv2DTranspose], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_output = self.generator(input) with tf.variable_scope('discrim'): real_pred = self.discriminator(input, output) fake_pred = self.discriminator(input, fake_output) self.build_losses(real_pred, fake_pred) errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss') self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss') add_moving_summary(errL1, self.g_loss) # tensorboard visualization if IN_CH == 1: input = tf.image.grayscale_to_rgb(input) if OUT_CH == 1: output = tf.image.grayscale_to_rgb(output) fake_output = tf.image.grayscale_to_rgb(fake_output) visualize_tensors('input,output,fake', [input, output, fake_output], max_outputs=max(30, BATCH)) self.collect_variables()
def LSGAN_losses(real, fake): d_real = tf.reduce_mean(tf.squared_difference(real, 1), name='d_real') d_fake = tf.reduce_mean(tf.square(fake), name='d_fake') d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss') g_loss = tf.reduce_mean(tf.squared_difference(fake, 1), name='g_loss') add_moving_summary(g_loss, d_loss) return g_loss, d_loss
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded label_logits: fHxfWxNA box_logits: fHxfWxNAx4 Returns: label_loss, box_loss """ with tf.device('/cpu:0'): valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.count_nonzero(pos_mask, dtype=tf.int32, name='num_pos_anchor') valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero( tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) summaries.append(tf.truediv( pos_prediction_corr, nr_pos, name='recall_th{}'.format(th))) precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction)) precision = tf.where(tf.equal(nr_pos_prediction, 0), 0.0, precision, name='precision_th{}'.format(th)) summaries.append(precision) add_moving_summary(*summaries) label_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 box_loss = tf.losses.huber_loss( pos_anchor_boxes, pos_box_logits, delta=delta, reduction=tf.losses.Reduction.SUM) / delta box_loss = tf.div( box_loss, tf.cast(nr_valid, tf.float32), name='box_loss') add_moving_summary(label_loss, box_loss, nr_valid, nr_pos) return label_loss, box_loss
def _build_graph(self, inputs): x, y, label = inputs x, y = self.embed([x, y]) with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(inputs[0]), name="emb") cost = symbf.siamese_cosine_loss(x, y, label, scope="loss") self.cost = tf.identity(cost, name="cost") add_moving_summary(self.cost)
def get_feature_match_loss(self, feats_real, feats_fake): losses = [] for real, fake in zip(feats_real, feats_fake): loss = tf.reduce_mean(tf.squared_difference( tf.reduce_mean(real, 0), tf.reduce_mean(fake, 0)), name='mse_feat_' + real.op.name) losses.append(loss) ret = tf.add_n(losses, name='feature_match_loss') add_moving_summary(ret) return ret
def _build_graph(self, inputs): a, p, n = inputs a, p, n = self.embed([a, p, n]) with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(inputs[0]), name="emb") cost, pos_dist, neg_dist = self.loss(a, p, n) self.cost = tf.identity(cost, name="cost") add_moving_summary(pos_dist, neg_dist, self.cost)
def build_graph(self, x, y, label): single_input = x x, y = self.embed([x, y]) with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(single_input), name="emb") cost = siamese_cosine_loss(x, y, label, scope="loss") cost = tf.identity(cost, name="cost") add_moving_summary(cost) return cost
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): logits = (LinearWrap(image) .Conv2D('conv0') .MaxPooling('pool0', 2) .Conv2D('conv1') .Conv2D('conv2') .MaxPooling('pool1', 2) .Conv2D('conv3') .FullyConnected('fc0', 512, nl=tf.nn.relu) .Dropout('dropout', 0.5) .FullyConnected('fc1', out_dim=10, nl=tf.identity)()) tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def build_graph(self, a, p, n): single_input = a a, p, n = self.embed([a, p, n]) with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(single_input), name="emb") cost, pos_dist, neg_dist = self.loss(a, p, n) cost = tf.identity(cost, name="cost") add_moving_summary(pos_dist, neg_dist, cost) return cost
def build_graph(self, image, label): """This function should build the model which takes the input variables and return cost at the end""" # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu): l = tf.layers.conv2d(image, 32, 3, name='conv0') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, name='conv1') l = tf.layers.conv2d(l, 32, 3, name='conv2') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, name='conv3') l = tf.layers.flatten(l) l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0') l = tf.layers.dropout(l, rate=0.5, training=get_current_tower_context().is_training) logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1') # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/kernel', tf.nn.l2_loss), name='regularize_loss') total_cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/kernel', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): image, label = inputs image = ImageNetModel.image_preprocess(image, bgr=self.image_bgr) if self.data_format == 'NCHW': image = tf.transpose(image, [0, 3, 1, 2]) logits = self.get_logits(image) loss = ImageNetModel.compute_loss_and_error(logits, label) wd_loss = regularize_cost(self.weight_decay_pattern, tf.contrib.layers.l2_regularizer(self.weight_decay), name='l2_regularize_loss') add_moving_summary(loss, wd_loss) self.cost = tf.add_n([loss, wd_loss], name='cost')
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3') l = tf.layers.flatten(l) l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0') l = tf.layers.dropout(l, rate=0.5, training=get_current_tower_context().is_training) logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1') tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/kernel', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
def build_graph(self, input, nextinput): is_training = get_current_tower_context().is_training initializer = tf.random_uniform_initializer(-0.05, 0.05) def get_basic_cell(): cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=0.0, reuse=tf.get_variable_scope().reuse) if is_training: cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - DROPOUT) return cell cell = rnn.MultiRNNCell([get_basic_cell() for _ in range(NUM_LAYER)]) def get_v(n): return tf.get_variable(n, [BATCH, HIDDEN_SIZE], trainable=False, initializer=tf.constant_initializer()) state_var = [rnn.LSTMStateTuple( get_v('c{}'.format(k)), get_v('h{}'.format(k))) for k in range(NUM_LAYER)] self.state = state_var = tuple(state_var) embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x hiddensize input_feature = Dropout(input_feature, keep_prob=1 - DROPOUT) with tf.variable_scope('LSTM', initializer=initializer): input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1) # seqlen x (Bxhidden) outputs, last_state = rnn.static_rnn(cell, input_list, state_var, scope='rnn') # update the hidden state after a rnn loop completes update_state_ops = [] for k in range(NUM_LAYER): update_state_ops.extend([ tf.assign(state_var[k].c, last_state[k].c), tf.assign(state_var[k].h, last_state[k].h)]) # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden logits = FullyConnected('fc', output, VOCAB_SIZE, activation=tf.identity, kernel_initializer=initializer, bias_initializer=initializer) xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.reshape(nextinput, [-1])) with tf.control_dependencies(update_state_ops): cost = tf.truediv(tf.reduce_sum(xent_loss), tf.cast(BATCH, tf.float32), name='cost') # log-perplexity perpl = tf.exp(cost / SEQ_LEN, name='perplexity') summary.add_moving_summary(perpl, cost) return cost
def _build_graph(self, inputs): image_pos = inputs[0] image_pos = image_pos / 128.0 - 1 z = tf.random_uniform([args.batch, args.z_dim], minval=-1, maxval=1, name='z_train') z = tf.placeholder_with_default(z, [None, args.z_dim], name='z') def summary_image(name, x): x = (x + 1.0) * 128.0 x = tf.clip_by_value(x, 0, 255) tf.summary.image(name, tf.cast(x, tf.uint8), max_outputs=30) with argscope([Conv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): image_gen = self.decoder(z) with tf.variable_scope('discrim'): with tf.variable_scope('enc'): hidden_pos = self.encoder(image_pos) hidden_neg = self.encoder(image_gen) with tf.variable_scope('dec'): recon_pos = self.decoder(hidden_pos) recon_neg = self.decoder(hidden_neg) with tf.name_scope('viz'): summary_image('generated-samples', image_gen) summary_image('reconstruct-real', recon_pos) summary_image('reconstruct-fake', recon_neg) with tf.name_scope('losses'): L_pos = tf.reduce_mean(tf.abs(recon_pos - image_pos), name='loss_pos') L_neg = tf.reduce_mean(tf.abs(recon_neg - image_gen), name='loss_neg') eq = tf.subtract(GAMMA * L_pos, L_neg, name='equilibrium') measure = tf.add(L_pos, tf.abs(eq), name='measure') kt = tf.get_variable('kt', dtype=tf.float32, initializer=0.0) update_kt = kt.assign_add(1e-3 * eq) with tf.control_dependencies([update_kt]): self.d_loss = tf.subtract(L_pos, kt * L_neg, name='loss_D') self.g_loss = L_neg add_moving_summary(L_pos, L_neg, eq, measure, self.d_loss) tf.summary.scalar('kt', kt) self.collect_variables()
def compute_loss_and_error(logits, label): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss = tf.reduce_mean(loss, name='xentropy-loss') def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'): with tf.name_scope('prediction_incorrect'): x = tf.logical_not(tf.nn.in_top_k(logits, label, topk)) return tf.cast(x, tf.float32, name=name) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) return loss
def build_graph(self, x, y, label): # embed them single_input = x x, y = self.embed([x, y]) # tag the embedding of 'input' with name 'emb', just for inference later on with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(single_input), name="emb") # compute the actual loss cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss") cost = tf.identity(cost, name="cost") # track these values during training add_moving_summary(pos_dist, neg_dist, cost) return cost
def _build_graph(self, inputs): # get inputs x, y, label = inputs # embed them x, y = self.embed([x, y]) # tag the embedding of 'input' with name 'emb', just for inference later on with tf.variable_scope(tf.get_variable_scope(), reuse=True): tf.identity(self.embed(inputs[0]), name="emb") # compute the actual loss cost, pos_dist, neg_dist = symbf.contrastive_loss(x, y, label, 5., extra=True, scope="loss") self.cost = tf.identity(cost, name="cost") # track these values during training add_moving_summary(pos_dist, neg_dist, self.cost)
def sample_fg_bg(iou): fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int( cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg') fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum( cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg') bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds
def build_graph(self, x, label): # embed them x = self.embed(x) x = tf.identity(x, name='emb') # compute the embedding loss emb_cost = center_loss(x, label, 10, 0.01) # compute the classification loss logits = slim.layers.fully_connected(tf.nn.relu(x), 10, activation_fn=None, scope='logits') cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label), name='classification_costs') total_cost = tf.add(emb_cost, 100 * cls_cost, name="cost") # track these values during training add_moving_summary(total_cost, cls_cost, emb_cost) return total_cost
def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits): """ Args: labels: n, label_logits: nxC fg_boxes: nfgx4, encoded fg_box_logits: nfgxCx4 or nfgx1x4 if class agnostic Returns: label_loss, box_loss """ label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=label_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') fg_inds = tf.where(labels > 0)[:, 0] fg_labels = tf.gather(labels, fg_inds) num_fg = tf.size(fg_inds, out_type=tf.int64) empty_fg = tf.equal(num_fg, 0) if int(fg_box_logits.shape[1]) > 1: indices = tf.stack( [tf.range(num_fg), fg_labels], axis=1) # #fgx2 fg_box_logits = tf.gather_nd(fg_box_logits, indices) else: fg_box_logits = tf.reshape(fg_box_logits, [-1, 4]) with tf.name_scope('label_metrics'), tf.device('/cpu:0'): prediction = tf.argmax(label_logits, axis=1, name='label_prediction') correct = tf.to_float(tf.equal(prediction, labels)) # boolean/integer gather is unavailable on GPU accuracy = tf.reduce_mean(correct, name='accuracy') fg_label_pred = tf.argmax(tf.gather(label_logits, fg_inds), axis=1) num_zero = tf.reduce_sum(tf.to_int64(tf.equal(fg_label_pred, 0)), name='num_zero') false_negative = tf.where( empty_fg, 0., tf.to_float(tf.truediv(num_zero, num_fg)), name='false_negative') fg_accuracy = tf.where( empty_fg, 0., tf.reduce_mean(tf.gather(correct, fg_inds)), name='fg_accuracy') box_loss = tf.losses.huber_loss( fg_boxes, fg_box_logits, reduction=tf.losses.Reduction.SUM) box_loss = tf.truediv( box_loss, tf.to_float(tf.shape(labels)[0]), name='box_loss') add_moving_summary(label_loss, box_loss, accuracy, fg_accuracy, false_negative, tf.to_float(num_fg, name='num_fg_label')) return label_loss, box_loss
def proposal_metrics(iou): """ Add summaries for RPN proposals. Args: iou: nxm, #proposal x #gt """ # find best roi for each gt, for summary only best_iou = tf.reduce_max(iou, axis=0) mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt') summaries = [mean_best_iou] with tf.device('/cpu:0'): for th in [0.3, 0.5]: recall = tf.truediv( tf.count_nonzero(best_iou >= th), tf.size(best_iou, out_type=tf.int64), name='recall_iou{}'.format(th)) summaries.append(recall) add_moving_summary(*summaries)
def build_graph(self, comb_state, action, reward, isOver): comb_state = tf.cast(comb_state, tf.float32) comb_state = tf.reshape( comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel]) state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1]) state = tf.reshape(state, self._shape4d_for_prediction, name='state') self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state') next_state = tf.reshape(next_state, self._shape4d_for_prediction) action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.losses.huber_loss( target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary(('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def build_losses(self, vecpos, vecneg): # the Wasserstein-GAN losses self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss') self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss') add_moving_summary(self.d_loss, self.g_loss)
def build_graph(self, real_sample): real_sample = tf.expand_dims(real_sample, -1) # sample the latent code: zc = shapeless_placeholder(sample_prior(BATCH), 0, name='z_code') z_noise = shapeless_placeholder(tf.random_uniform([BATCH, NOISE_DIM], -1, 1), 0, name='z_noise') z = tf.concat([zc, z_noise], 1, name='z') with argscope([Conv2D, Conv2DTranspose, FullyConnected], kernel_initializer=tf.truncated_normal_initializer( stddev=0.02)): with tf.variable_scope('gen'): fake_sample = self.generator(z) fake_sample_viz = tf.cast((fake_sample) * 255.0, tf.uint8, name='viz') tf.summary.image('gen', fake_sample_viz, max_outputs=30) # may need to investigate how bn stats should be updated across two discrim with tf.variable_scope('discrim'): real_pred, _ = self.discriminator(real_sample) fake_pred, dist_param = self.discriminator(fake_sample) """ Mutual information between x (i.e. zc in this case) and some information s (the generated samples in this case): I(x;s) = H(x) - H(x|s) = H(x) + E[\log P(x|s)] The distribution from which zc is sampled, in this case, is set to a fixed prior already. So the first term is a constant. For the second term, we can maximize its variational lower bound: E_{x \sim P(x|s)}[\log Q(x|s)] where Q(x|s) is a proposal distribution to approximate P(x|s). Here, Q(x|s) is assumed to be a distribution which shares the form of P, and whose parameters are predicted by the discriminator network. """ with tf.name_scope("mutual_information"): with tf.name_scope('prior_entropy'): cat, uni = get_distributions(DIST_PRIOR_PARAM[:NUM_CLASS], DIST_PRIOR_PARAM[NUM_CLASS:]) ents = [ cat.entropy(name='cat_entropy'), tf.reduce_sum(uni.entropy(), name='uni_entropy') ] entropy = tf.add_n(ents, name='total_entropy') # Note that the entropy of prior is a constant. The paper mentioned it but didn't use it. with tf.name_scope('conditional_entropy'): cond_ents = entropy_from_samples(zc, dist_param) cond_entropy = tf.add_n(cond_ents, name="total_entropy") MI = tf.subtract(entropy, cond_entropy, name='mutual_information') summary.add_moving_summary(entropy, cond_entropy, MI, *cond_ents) # default GAN objective self.build_losses(real_pred, fake_pred) # subtract mutual information for latent factors (we want to maximize them) self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss') self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss') summary.add_moving_summary(self.g_loss, self.d_loss) # distinguish between variables of generator and discriminator updates self.collect_variables()
def build_graph(self, A, B): with tf.name_scope('preprocess'): A = tf.transpose(A / 128.0 - 1.0, [0, 3, 1, 2]) B = tf.transpose(B / 128.0 - 1.0, [0, 3, 1, 2]) def viz3(name, a, b, c): with tf.name_scope(name): im = tf.concat([a, b, c], axis=3) im = tf.transpose(im, [0, 2, 3, 1]) im = (im + 1.0) * 128 im = tf.clip_by_value(im, 0, 255) im = tf.cast(im, tf.uint8, name='viz') tf.summary.image(name, im, max_outputs=50) # use the initializers from torch with argscope([Conv2D, Conv2DTranspose], use_bias=False, kernel_initializer=tf.random_normal_initializer(stddev=0.02)), \ argscope([Conv2D, Conv2DTranspose, InstanceNorm], data_format='channels_first'): with tf.variable_scope('gen'): with tf.variable_scope('B'): AB = self.generator(A) with tf.variable_scope('A'): BA = self.generator(B) ABA = self.generator(AB) with tf.variable_scope('B'): BAB = self.generator(BA) viz3('A_recon', A, AB, ABA) viz3('B_recon', B, BA, BAB) with tf.variable_scope('discrim'): with tf.variable_scope('A'): A_dis_real = self.discriminator(A) A_dis_fake = self.discriminator(BA) with tf.variable_scope('B'): B_dis_real = self.discriminator(B) B_dis_fake = self.discriminator(AB) def LSGAN_losses(real, fake): d_real = tf.reduce_mean(tf.squared_difference(real, 1), name='d_real') d_fake = tf.reduce_mean(tf.square(fake), name='d_fake') d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss') g_loss = tf.reduce_mean(tf.squared_difference(fake, 1), name='g_loss') add_moving_summary(g_loss, d_loss) return g_loss, d_loss with tf.name_scope('losses'): with tf.name_scope('LossA'): # reconstruction loss recon_loss_A = tf.reduce_mean(tf.abs(A - ABA), name='recon_loss') # gan loss G_loss_A, D_loss_A = LSGAN_losses(A_dis_real, A_dis_fake) with tf.name_scope('LossB'): recon_loss_B = tf.reduce_mean(tf.abs(B - BAB), name='recon_loss') G_loss_B, D_loss_B = LSGAN_losses(B_dis_real, B_dis_fake) LAMBDA = 10.0 self.g_loss = tf.add((G_loss_A + G_loss_B), (recon_loss_A + recon_loss_B) * LAMBDA, name='G_loss_total') self.d_loss = tf.add(D_loss_A, D_loss_B, name='D_loss_total') self.collect_variables('gen', 'discrim') add_moving_summary(recon_loss_A, recon_loss_B, self.g_loss, self.d_loss)
def build_graph(self, role_id, prob_state, value_state, last_cards, action_target, mode, history_action_prob, discounted_return, lstm_state): active_logits, passive_logits, new_lstm_state = self.get_policy( role_id, prob_state, last_cards, lstm_state) new_lstm_state = tf.identity(new_lstm_state, name='new_lstm_state') active_prob = tf.nn.softmax(active_logits, name='active_prob') passive_prob = tf.nn.softmax(passive_logits, name='passive_prob') mode_out = tf.identity(mode, name='mode_out') value = self.get_value(role_id, value_state) # this is the value for each agent, not the global value value = tf.identity(value, name='pred_value') is_training = get_current_tower_context().is_training if not is_training: return action_target_onehot = tf.one_hot(action_target, len(action_space)) # active mode active_logpa = tf.reduce_sum( action_target_onehot * tf.log(tf.clip_by_value(active_prob, 1e-7, 1 - 1e-7)), 1) # passive mode passive_logpa = tf.reduce_sum( action_target_onehot * tf.log(tf.clip_by_value(passive_prob, 1e-7, 1 - 1e-7)), 1) # B * 2 logpa = tf.stack([active_logpa, passive_logpa], axis=1) idx = tf.stack([tf.range(tf.shape(prob_state)[0]), mode], axis=1) # B logpa = tf.gather_nd(logpa, idx) # importance sampling active_pa = tf.reduce_sum( action_target_onehot * tf.clip_by_value(active_prob, 1e-7, 1 - 1e-7), 1) passive_pa = tf.reduce_sum( action_target_onehot * tf.clip_by_value(passive_prob, 1e-7, 1 - 1e-7), 1) # B * 2 pa = tf.stack([active_pa, passive_pa], axis=1) idx = tf.stack([tf.range(tf.shape(prob_state)[0]), mode], axis=1) # B pa = tf.gather_nd(pa, idx) # using PPO ppo_epsilon = tf.get_variable('ppo_epsilon', shape=[], initializer=tf.constant_initializer(0.2), trainable=False) importance_b = pa / (history_action_prob + 1e-8) # advantage advantage_b = tf.subtract(discounted_return, tf.stop_gradient(value), name='advantage') policy_loss_b = -tf.minimum( importance_b * advantage_b, tf.clip_by_value(importance_b, 1 - ppo_epsilon, 1 + ppo_epsilon) * advantage_b) entropy_loss_b = pa * logpa value_loss_b = tf.square(value - discounted_return) entropy_beta = tf.get_variable( 'entropy_beta', shape=[], initializer=tf.constant_initializer(0.005), trainable=False) value_weight = tf.get_variable( 'value_weight', shape=[], initializer=tf.constant_initializer(0.2), trainable=False) # regularization loss ctx = get_current_tower_context() if ctx.has_own_variables: # be careful of the first tower (name='') l2_loss = ctx.get_collection_in_tower( tf.GraphKeys.REGULARIZATION_LOSSES) else: l2_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if len(l2_loss) > 0: logger.info( "regularize_cost_from_collection() found {} regularizers " "in REGULARIZATION_LOSSES collection.".format(len(l2_loss))) # 3 * 2 l2_losses = [] for role in range(1, 4): scope = 'policy_network_%d' % role l2_loss_role = [l for l in l2_loss if l.op.name.startswith(scope)] l2_active_loss = [ l for l in l2_loss_role if 'branch_passive' not in l.name ] l2_passive_loss = l2_loss_role print('l2 active loss: {}'.format(len(l2_active_loss))) print('l2 passive loss: {}'.format(len(l2_passive_loss))) # 2 losses = [tf.add_n(l2_active_loss), tf.add_n(l2_passive_loss)] losses = tf.stack(losses, axis=0) if role == 1 or role == 3: losses = tf.stop_gradient(losses) l2_losses.append(losses) # 3 * 2 l2_losses = tf.stack(l2_losses, axis=0) # B * 2 l2_losses = tf.gather(l2_losses, role_id) # B l2_losses = tf.gather_nd(l2_losses, idx) print(l2_losses.shape) # print(policy_loss_b.shape) # print(entropy_loss_b.shape) # print(value_loss_b.shape) # print(advantage_b.shape) costs = [] for i in range(1, 4): mask = tf.equal(role_id, i) valid_batch = tf.reduce_sum(tf.cast(mask, tf.float32)) # print(mask.shape) l2_loss = tf.truediv(tf.reduce_sum(tf.boolean_mask( l2_losses, mask)), valid_batch, name='l2_loss_%d' % i) pred_reward = tf.truediv(tf.reduce_sum(tf.boolean_mask( value, mask)), valid_batch, name='predict_reward_%d' % i) true_reward = tf.truediv(tf.reduce_sum( tf.boolean_mask(discounted_return, mask)), valid_batch, name='true_reward_%d' % i) advantage = tf.sqrt(tf.truediv( tf.reduce_sum(tf.square(tf.boolean_mask(advantage_b, mask))), valid_batch), name='rms_advantage_%d' % i) policy_loss = tf.truediv(tf.reduce_sum( tf.boolean_mask(policy_loss_b, mask)), valid_batch, name='policy_loss_%d' % i) entropy_loss = tf.truediv(tf.reduce_sum( tf.boolean_mask(entropy_loss_b, mask)), valid_batch, name='entropy_loss_%d' % i) value_loss = tf.truediv(tf.reduce_sum( tf.boolean_mask(value_loss_b, mask)), valid_batch, name='value_loss_%d' % i) cost = tf.add_n([ policy_loss, entropy_loss * entropy_beta, value_weight * value_loss, l2_loss ], name='cost_%d' % i) # cost = tf.truediv(cost, tf.reduce_sum(tf.cast(mask, tf.float32)), name='cost_%d' % i) costs.append(cost) importance = tf.truediv(tf.reduce_sum( tf.boolean_mask(importance_b, mask)), valid_batch, name='importance_%d' % i) add_moving_summary(policy_loss, entropy_loss, value_loss, l2_loss, pred_reward, true_reward, advantage, cost, importance, decay=0) return tf.add_n(costs)
def build_graph(self, *inputs): mseq, mlen, pseq, plen, pve, target = inputs[:6] h_stats = list(inputs[6:]) initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope(self.vs_name): # Feature embedding vocab_size = LayerTypes.num_layer_types() embeddingW = tf.get_variable('embedding', [vocab_size, self.lstm_size], initializer=initializer) mfeat = tf.nn.embedding_lookup(embeddingW, mseq) # B x seqlen x hiddensize mfeat = Dropout(mfeat, keep_prob=self.dropout_kp) pfeat = tf.nn.embedding_lookup(embeddingW, pseq) pfeat = Dropout(pfeat, keep_prob=self.dropout_kp) # LSTM structures def get_basic_cell(): cell = rnn.LSTMCell(num_units=self.lstm_size, initializer=initializer, reuse=tf.get_variable_scope().reuse) cell = rnn.DropoutWrapper(cell, output_keep_prob=self.dropout_kp) return cell cells = rnn.MultiRNNCell( [get_basic_cell() for _ in range(self.num_lstms)]) #cells =cudnn_rnn.CudnnLSTM(self.num_lstms, self.lstm_size, dropout=1 - self.dropout_kp, # kernel_initializer=initializer) # initial state mstate = cells.zero_state(self.batch_size, dtype=tf.float32) pstate = cells.zero_state(self.batch_size, dtype=tf.float32) # apply LSTMs on the feature embedding with tf.variable_scope('LSTM'): mout, mstate = tf.nn.dynamic_rnn(cells, mfeat, initial_state=mstate, sequence_length=mlen) pout, pstate = tf.nn.dynamic_rnn(cells, pfeat, initial_state=pstate, sequence_length=plen) # only use the last output for predicting the child model accuracy mlen = tf.cast(tf.reshape(mlen, [self.batch_size, 1]), dtype=tf.float32) plen = tf.cast(tf.reshape(plen, [self.batch_size, 1]), dtype=tf.float32) pve = tf.reshape(pve, [self.batch_size, 1]) h_stats = [tf.reshape(hs, [-1, 1]) for hs in h_stats] feat = tf.concat(values=[mout[:, -1], pout[:, -1], pve] + h_stats, axis=1) pred = FullyConnected('fully_connect', feat, 1, activation=tf.sigmoid) pred = tf.reshape(pred, [self.batch_size]) self.pred = tf.identity(pred, name='predicted_accuracy') cost = tf.losses.mean_squared_error(target, self.pred) self.cost = tf.identity(cost, name='cost') add_moving_summary(self.cost) return self.cost
def build_graph(self, image, label): """ Build the whole symbolic graph. This is supposed to be part of the "tower function" when used with :class:`TowerTrainer`. By default it will call :meth:`_build_graph` with a list of input tensors. A subclass is expected to overwrite this method or the :meth:`_build_graph` method. Args: args ([tf.Tensor]): tensors that matches the list of inputs defined by ``inputs()``. Returns: In general it returns nothing, but a subclass (e.g. :class:`ModelDesc`) may require it to return necessary information (e.g. cost) to build the trainer. """ # inputs to conv nets are NWHC := Num_samples x Height x Width x Channels image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero?? i don't understand .. # build symbolic layers somewhere in here # ref. info about argscope: http://tensorpack.readthedocs.io/en/latest/_modules/tensorpack/tfutils/argscope.html # making layers in argscope is supposed to let you do something ..? assign arg. characteristics to each layer # tp layers """ #with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): # following 6 layer architecture used previously c0 = Conv2D('conv0', image, kernel_size=3, nl=tf.nn.relu, out_channel=32) # c0.variables = None p0 = MaxPooling('pool0', c0, 2) # p0.variables = None c1 = Conv2D('conv1', p0, kernel_size=3, nl=tf.nn.relu, out_channel=32) # c1.variables = None p1 = MaxPooling('pool1', c1, 2) # p1.variables = None fc1 = FullyConnected('fc0', p1, 1024, nl=tf.nn.relu) # fc1.variables = None fc1 = Dropout('dropout', fc1, rate=0.6) # fc1.variables = None logits = FullyConnected('fc1', fc1, out_dim=10, nl=tf.identity) # logits.variables = None """ # tf layers conv1 = tf.layers.conv2d( inputs=image, filters=32, kernel_size=3, kernel_initializer=tf.contrib.layers.variance_scaling_initializer( 2.0), padding="same", activation=tf.nn.relu) # Pooling Layer #1 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) # Convolutional Layer #2 and Pooling Layer #2 conv2 = tf.layers.conv2d( inputs=pool1, filters=32, kernel_size=3, kernel_initializer=tf.contrib.layers.variance_scaling_initializer( 2.0), padding="same", activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) # Dense Layer pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 32]) dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu) dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=True) # Logits Layer logits = tf.layers.dense(inputs=dropout, units=10) #""" # Should I have this line if I'm doing sparse_softmax_cross_entropy_with_logits later? tf.nn.softmax(logits, name='prob') # normalize to usable prob. distr. # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name='cross_entropy_loss') # the average cross-entropy loss # Casts to float32 type after checking if the prediction (1st) is equal to the label value correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') # ? summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # Regularizing - avoiding overfitting wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') total_cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # ? return total_cost
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs fm_anchors = self._get_anchors(image) image = self._preprocess(image) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) featuremap = pretrained_resnet_conv4(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head(featuremap, 1024, config.NUM_ANCHOR) rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) decoded_boxes = decode_bbox_target( rpn_box_logits, fm_anchors, config.ANCHOR_STRIDE) # (fHxfWxNA)x4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( decoded_boxes, tf.reshape(rpn_label_logits, [-1]), tf.shape(image)[2:]) if is_training: rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5_gap( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxc fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses( rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits, fastrcnn_box_logits) wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') self.cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, wd_cost ], 'total_cost') for k in self.cost, wd_cost: add_moving_summary(k) else: roi_resized = roi_align( featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_fastrcnn = resnet_conv5_gap( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxc label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) label_probs = tf.nn.softmax(label_logits, name='fastrcnn_all_probs') # NP, labels = tf.argmax(label_logits, axis=1) fg_ind, fg_box_logits = fastrcnn_predict_boxes( labels, fastrcnn_box_logits) fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs') fg_boxes = tf.gather(proposal_boxes, fg_ind) fg_box_logits = fg_box_logits / tf.constant( config.FASTRCNN_BBOX_REG_WEIGHTS) decoded_boxes = decode_bbox_target( fg_box_logits, fg_boxes, config.ANCHOR_STRIDE) # Nfx4, floatbox decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): """ Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 Returns: sampled_boxes: tx4 floatbox, the rois target_boxes: tx4 encoded box, the regression target labels: t labels """ @under_name_scope() def assign_class_to_roi(iou, gt_boxes, gt_labels): """ Args: iou: nxm (nr_proposal x nr_gt) Returns: fg_mask: n boolean, whether each roibox is fg roi_labels: n int32, best label for each roi box best_gt_boxes: nx4 """ # find best gt box for each roi box best_iou_ind = tf.argmax(iou, axis=1) # n, each in 1~m best_iou = tf.reduce_max(iou, axis=1) # n, best_gt_boxes = tf.gather(gt_boxes, best_iou_ind) # nx4 best_gt_labels = tf.gather(gt_labels, best_iou_ind) # n, each in 1~C fg_mask = best_iou >= config.FASTRCNN_FG_THRESH return fg_mask, best_gt_labels, best_gt_boxes iou = pairwise_iou(boxes, gt_boxes) # nxm with tf.name_scope('proposal_metrics'): # find best roi for each gt, for summary only best_iou = tf.reduce_max(iou, axis=0) mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt') summaries = [mean_best_iou] with tf.device('/cpu:0'): for th in [0.3, 0.5]: recall = tf.truediv(tf.count_nonzero(best_iou >= th), tf.size(best_iou, out_type=tf.int64), name='recall_iou{}'.format(th)) summaries.append(recall) add_moving_summary(*summaries) # n, n, nx4 fg_mask, roi_labels, best_gt_boxes = assign_class_to_roi( iou, gt_boxes, gt_labels) # don't have to add gt for training, but add it anyway fg_inds = tf.reshape(tf.where(fg_mask), [-1]) fg_inds = tf.concat([ fg_inds, tf.cast(tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0], tf.int64) ], 0) num_fg = tf.size(fg_inds) num_fg = tf.minimum(int(config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO), num_fg, name='num_fg') fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg]) bg_inds = tf.where(tf.logical_not(fg_mask))[:, 0] num_bg = tf.size(bg_inds) num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg, num_bg, name='num_bg') bg_inds = tf.slice(tf.random_shuffle(bg_inds), [0], [num_bg]) add_moving_summary(num_fg, num_bg) all_boxes = tf.concat([boxes, gt_boxes], axis=0) all_matched_gt_boxes = tf.concat([best_gt_boxes, gt_boxes], axis=0) all_labels = tf.concat([roi_labels, gt_labels], axis=0) ind_in_all = tf.concat([fg_inds, bg_inds], axis=0) # ind in all n+m boxes ret_boxes = tf.gather(all_boxes, ind_in_all, name='sampled_boxes') ret_matched_gt_boxes = tf.gather(all_matched_gt_boxes, ind_in_all) ret_encoded_boxes = encode_bbox_target(ret_matched_gt_boxes, ret_boxes) ret_encoded_boxes = ret_encoded_boxes * tf.constant( config.FASTRCNN_BBOX_REG_WEIGHTS) # bg boxes will not be trained on ret_labels = tf.concat([ tf.gather(all_labels, fg_inds), tf.zeros_like(bg_inds, dtype=tf.int64) ], axis=0, name='sampled_labels') return ret_boxes, tf.stop_gradient(ret_encoded_boxes), tf.stop_gradient( ret_labels)
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target( rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK, config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # which was fixed in TF 1.6 def ff_true(): feature_fastrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, config.NUM_CLASS) # Return C5 feature to be shared with mask branch return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) if get_tf_version_number() >= 1.6: feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true( ) else: logger.warn("This example may drop support for TF < 1.6 soon.") feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align( featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond( tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
def _build_graph(self, inputs): G = tf.get_default_graph() # For round tf.local_variables_initializer() tf.global_variables_initializer() pi, pm, pl, ui, um, ul = inputs pi = cvt2tanh(pi) pm = cvt2tanh(pm) pl = cvt2tanh(pl) ui = cvt2tanh(ui) um = cvt2tanh(um) ul = cvt2tanh(ul) # def tf_membr(label): # with freeze_variables(): # label = np_2imag(label, maxVal=MAX_LABEL) # label = np.squeeze(label) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one # # label, nb_labels = skimage.measure.label(color, return_num=True) # # label = np.expand_dims(label, axis=-1).astype(np.float32) # Modify here for batch # # for z in range(membr.shape[0]): # # membr[z,...] = 1-skimage.segmentation.find_boundaries(np.squeeze(label[z,...]), mode='thick') #, mode='inner' # membr = 1-skimage.segmentation.find_boundaries(np.squeeze(label), mode='thick') #, mode='inner' # membr = np.expand_dims(membr, axis=-1).astype(np.float32) # membr = np.expand_dims(membr, axis=0).astype(np.float32) # membr = np_2tanh(membr, maxVal=1.0) # membr = np.reshape(membr, label.shape) # return membr # def tf_label(color): # with freeze_variables(): # color = np_2imag(color, maxVal=MAX_LABEL) # color = np.squeeze(color) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one # label, nb_labels = skimage.measure.label(color, return_num=True) # label = np.expand_dims(label, axis=-1).astype(np.float32) # label = np.expand_dims(label, axis=0).astype(np.float32) # label = np_2tanh(label, maxVal=MAX_LABEL) # label = np.reshape(label, color.shape) # return label def tf_rand_score (x1, x2): return 1.0 - adjusted_rand_score (x1.flatten (), x2.flatten ()) def rounded(label, factor = MAX_LABEL, name='quantized'): with G.gradient_override_map({"Round": "Identity"}): with freeze_variables(): with tf.name_scope(name=name): label = cvt2imag(label, maxVal=factor) label = tf.round(label) label = cvt2tanh(label, maxVal=factor) return tf.identity(label, name=name) with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02), use_bias=False), \ argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \ argscope([Conv2D, Deconv2D, BatchNorm], data_format='NHWC'), \ argscope(LeakyReLU, alpha=0.2): with tf.variable_scope('gen'): # Real pair image 4 gen with tf.variable_scope('I2M'): pim = self.generator(pi) with tf.variable_scope('M2L'): piml = self.generator(pim) pml = self.generator(pm) # piml = tf.py_func(tf_label, [(pim)], tf.float32) # pml = tf.py_func(tf_label, [(pm)], tf.float32) # print pim # print piml # with tf.variable_scope('L2M'): # # with freeze_variables(): # pimlm = self.generator(piml) # # plm = self.generator(pl) # pmlm = self.generator(pml) # # pimlm = tf.py_func(tf_membr, [(piml)], tf.float32) # # # plm = tf.py_func(tf_membr, [(pl) ], tf.float32) # # pmlm = tf.py_func(tf_membr, [(pml) ], tf.float32) # # print piml # # print pimlm # with tf.variable_scope('M2I'): # pimlmi = self.generator(pimlm) # # pimi = self.generator(pim) # # Real pair label 4 gen # with tf.variable_scope('L2M'): # # with freeze_variables(): # plm = self.generator(pl) # # plm = tf.py_func(tf_membr, [(pl) , tf.float32]) # with tf.variable_scope('M2I'): # plmi = self.generator(plm) # pmi = self.generator(pi) # with tf.variable_scope('I2M'): # plmim = self.generator(plmi) # # pim = self.generator(pi) # pmim = self.generator(pmi) # with tf.variable_scope('M2L'): # plmiml = self.generator(plmim) # # plml = self.generator(plm) # # plmiml = tf.py_func(tf_label, [(plmim)], tf.float32) # # plml = tf.py_func(tf_label, [(plm)], tf.float32) with tf.variable_scope('discrim'): # with tf.variable_scope('I'): # i_dis_real = self.discriminator(ui) # i_dis_fake_from_label = self.discriminator(plmi) with tf.variable_scope('M'): m_dis_real = self.discriminator(um) m_dis_fake_from_image = self.discriminator(pim) # m_dis_fake_from_label = self.discriminator(plm) with tf.variable_scope('L'): l_dis_real = self.discriminator(ul) l_dis_fake_from_image = self.discriminator(piml) piml = rounded(piml) # pml = rounded(pml) # plmiml = rounded(plmiml) # # plml = rounded(plml) # with tf.name_scope('Recon_I_loss'): # recon_imi = tf.reduce_mean(tf.abs((pi) - (pimi)), name='recon_imi') # recon_lmi = tf.reduce_mean(tf.abs((pi) - (plmi)), name='recon_lmi') # recon_imlmi = tf.reduce_mean(tf.abs((pi) - (pimlmi)), name='recon_imlmi') # with tf.name_scope('Recon_L_loss'): # recon_lml = tf.reduce_mean(tf.abs((pl) - (plml)), name='recon_lml') recon_iml = tf.reduce_mean(tf.abs((pl) - (piml)), name='recon_iml') # recon_lmiml = tf.reduce_mean(tf.abs((pl) - (plmiml)), name='recon_lmiml') # with tf.name_scope('Recon_M_loss'): # recon_mim = tf.reduce_mean(tf.abs((pm) - (pmim)), name='recon_mim') # recon_mlm = tf.reduce_mean(tf.abs((pm) - (pmlm)), name='recon_mlm') recon_im = tf.reduce_mean(tf.abs((pm) - (pim)), name='recon_im') # recon_lm = tf.reduce_mean(tf.abs((pm) - (plm)), name='recon_lm') with tf.name_scope('GAN_loss'): # G_loss_IL, D_loss_IL = self.build_losses(i_dis_real, i_dis_fake_from_label, name='IL') G_loss_LI, D_loss_LI = self.build_losses(l_dis_real, l_dis_fake_from_image, name='LL') G_loss_MI, D_loss_MI = self.build_losses(m_dis_real, m_dis_fake_from_image, name='MI') # G_loss_ML, D_loss_ML = self.build_losses(m_dis_real, m_dis_fake_from_label, name='ML') # custom loss for membr with tf.name_scope('membr_loss'): def membr_loss(y_true, y_pred, name='membr_loss'): return tf.reduce_mean(tf.subtract(binary_cross_entropy(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0)), dice_coe(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0), axis=[1,2,3], loss_type='jaccard')), name=name) membr_im = membr_loss(pm, pim, name='membr_im') # print membr_im # membr_lm = membr_loss(pm, plm, name='membr_lm') # membr_imlm = membr_loss(pm, pimlm, name='membr_imlm') # membr_lmim = membr_loss(pm, plmim, name='membr_lmim') # membr_mlm = membr_loss(pm, pmlm, name='membr_mlm') # membr_mim = membr_loss(pm, pmim, name='membr_mim') # custom loss for label with tf.name_scope('label_loss'): def label_loss(y_true_L, y_pred_L, y_grad_M, name='label_loss'): g_mag_grad_M = cvt2imag(y_grad_M, maxVal=1.0) mag_grad_L = magnitute_central_difference(y_pred_L, name='mag_grad_L') cond = tf.greater(mag_grad_L, tf.zeros_like(mag_grad_L)) thresholded_mag_grad_L = tf.where(cond, tf.ones_like(mag_grad_L), tf.zeros_like(mag_grad_L), name='thresholded_mag_grad_L') gtv_guess = tf.multiply(g_mag_grad_M, thresholded_mag_grad_L, name='gtv_guess') loss_gtv_guess = tf.reduce_mean(gtv_guess, name='loss_gtv_guess') thresholded_mag_grad_L = cvt2tanh(thresholded_mag_grad_L, maxVal=1.0) gtv_guess = cvt2tanh(gtv_guess, maxVal=1.0) return loss_gtv_guess, thresholded_mag_grad_L label_iml, g_iml = label_loss(None, piml, pim, name='label_iml') # label_lml, g_lml = label_loss(None, plml, plm, name='label_lml') # label_lmiml, g_lmiml = label_loss(None, plmiml, plmim, name='label_lmiml') label_ml, g_ml = label_loss(None, pml, pm, name='label_loss_ml') # custom loss for tf_rand_score with tf.name_scope('rand_loss'): rand_iml = tf.reduce_mean(tf.cast(tf.py_func (tf_rand_score, [piml, pl], tf.float64), tf.float32)) rand_ml = tf.reduce_mean(tf.cast(tf.py_func (tf_rand_score, [pml, pl], tf.float64), tf.float32)) self.g_loss = tf.add_n([ #(recon_imi), # + recon_lmi + recon_imlmi), # (recon_iml), # + recon_lml + recon_lmiml), # (recon_im), # + recon_lm + recon_mim + recon_mlm), (rand_iml), # + rand_lml + rand_lmiml), # (rand_ml), # + rand_lm + rand_mim + rand_mlm), # (G_loss_IL + G_loss_LI + G_loss_MI + G_loss_ML), (G_loss_LI + G_loss_MI), (membr_im), # + membr_lm + membr_imlm + membr_lmim + membr_mlm + membr_mim), # (label_iml + label_lml + label_lmiml + label_ml) (label_iml + label_ml) ], name='G_loss_total') self.d_loss = tf.add_n([ # (D_loss_IL + D_loss_LI + D_loss_MI + D_loss_ML), (D_loss_LI + D_loss_MI), ], name='D_loss_total') wd_g = regularize_cost('gen/.*/W', l2_regularizer(1e-5), name='G_regularize') wd_d = regularize_cost('discrim/.*/W', l2_regularizer(1e-5), name='D_regularize') self.g_loss = tf.add(self.g_loss, wd_g, name='g_loss') self.d_loss = tf.add(self.d_loss, wd_d, name='d_loss') self.collect_variables() add_moving_summary(self.d_loss, self.g_loss) add_moving_summary( recon_iml, recon_im, label_iml, label_ml, # rand_iml, # rand_ml, # membr_im # recon_imi, recon_lmi, recon_imlmi, # recon_lml, recon_iml, recon_lmiml, # recon_mim, recon_mlm, recon_im , recon_lm, ) viz = tf.concat([tf.concat([ui, pi, pim, piml, g_iml], 2), # tf.concat([ul, pl, plm, plmi, plmim, plmiml], 2), tf.concat([um, pl, pm, pml, g_ml], 2), # tf.concat([pl, pl, g_iml, g_lml, g_lmiml, g_ml], 2), ], 1) # add_moving_summary( # recon_imi, recon_lmi,# recon_imlmi, # recon_lml, recon_iml,# recon_lmiml, # recon_mim, recon_mlm, recon_im , recon_lm, # ) # viz = tf.concat([tf.concat([ui, pi, pim, piml], 2), # tf.concat([ul, pl, plm, plmi], 2), # tf.concat([um, pm, pmi, pmim], 2), # tf.concat([um, pm, pml, pmlm], 2), # ], 1) viz = cvt2imag(viz) viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') tf.summary.image('colorized', viz, max_outputs=50)
def _build_graph(self, inputs): xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE) for x in range(WARP_TARGET_SIZE)], dtype='float32') xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 image, label = inputs image = image / 255.0 - 0.5 # bhw2 def get_stn(image): stn = (LinearWrap(image).AvgPooling('downsample', 2).Conv2D( 'conv0', 20, 5, padding='VALID').MaxPooling('pool0', 2).Conv2D( 'conv1', 20, 5, padding='VALID').FullyConnected( 'fc1', out_dim=32).FullyConnected( 'fct', out_dim=6, nl=tf.identity, W_init=tf.constant_initializer(), b_init=tf.constant_initializer( [1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) # output 6 parameters for affine transformation stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) coor = tf.reshape(tf.matmul(xys, stn), [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2]) coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2 sampled = ImageSample('warp', [image, coor], borderMode='constant') return sampled with argscope([Conv2D, FullyConnected], nl=tf.nn.relu): with tf.variable_scope('STN1'): sampled1 = get_stn(image) with tf.variable_scope('STN2'): sampled2 = get_stn(image) # For visualization in tensorboard with tf.name_scope('visualization'): padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1) # b x 2h x w transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1) transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1) stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz') tf.summary.image('visualize', tf.expand_dims(stacked, -1), max_outputs=30) sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') logits = (LinearWrap(sampled).FullyConnected( 'fc1', out_dim=256, nl=tf.nn.relu).FullyConnected( 'fc2', out_dim=128, nl=tf.nn.relu).FullyConnected('fct', out_dim=19, nl=tf.identity)()) prob = tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label) summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error')) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') summary.add_moving_summary(cost, wd_cost) self.cost = tf.add_n([wd_cost, cost], name='cost')
def _build_graph(self, inputs): # sImg2d # sImg the projection 2D, reshape from vol3d, img2d, condition = inputs # Split the input with tf.variable_scope('gen'): with tf.device('/device:GPU:0'): # Step 0; run thru 3d encoder with tf.variable_scope('encoder_3d'): vol2d = self.vol3d_encoder(vol3d) # Step 1: Run thru the encoder with tf.variable_scope('encoder_vgg19_2d'): vol2d_encoded, vol2d_feature = self.vgg19_encoder(vol2d) img2d_encoded, img2d_feature = self.vgg19_encoder(img2d) # Step 2: Run thru the adain block to get t=AdIN(f(c), f(s)) with tf.variable_scope('style_transfer'): merge_encoded = self._build_adain_layers(vol2d_encoded, img2d_encoded) condition = tf.reshape(condition, []) # Make 0 rank for condition chose_encoded = tf.cond(condition > 0, # if istest turns on, perform statistical transfering lambda: tf.identity(merge_encoded), lambda: tf.identity(vol2d_encoded)) #else get the img2d_encoded img2d_encoded = tf.identity(img2d_encoded) with tf.device('/device:GPU:1'): # Step 3: Run thru the decoder to get the paint image with tf.variable_scope('decoder_vgg19_2d'): vol2d_decoded = self.vgg19_decoder(chose_encoded) img2d_decoded = self.vgg19_decoder(img2d_encoded) with tf.device('/device:GPU:2'): with tf.variable_scope('decoder_3d'): vol3d_decoded = self.vol3d_decoder(vol2d_decoded) img3d_decoded = self.vol3d_decoder(img2d_decoded) # Step 0; run thru 3d encoder with tf.variable_scope('encoder_3d'): img3d_encoded = self.vol3d_encoder(img3d_decoded) with tf.variable_scope('discrim'): with tf.device('/device:GPU:3'): vol3d_real = self.vgg19_discriminator(vol3d) img2d_real = self.vgg19_discriminator(img2d) with tf.device('/device:GPU:4'): vol3d_fake = self.vgg19_discriminator(vol3d_decoded) img2d_fake = self.vgg19_discriminator(img3d_encoded) # # Build losses here # with tf.name_scope('losses'): losses = [] # Content loss between t and f(g(t)) # loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d') loss_vol3d = tf.reduce_mean(tf.abs(vol3d - vol3d_decoded), name='loss_vol3d') loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d') loss_img2d = tf.reduce_mean(tf.abs(img2d - img2d_decoded), name='loss_img2d') loss_img3d = tf.reduce_mean(tf.abs(img2d - img3d_encoded), name='loss_img3d') # loss_img3d = tf.reduce_mean(tf.abs(img3d - img3d_decoded), name='loss_img3d') add_moving_summary(loss_vol3d) add_moving_summary(loss_vol2d) add_moving_summary(loss_img2d) add_moving_summary(loss_img3d) losses.append(1e0*loss_vol3d) # losses.append(1e0*loss_vol2d) # losses.append(1e1*loss_img2d) losses.append(1e0*loss_img3d) # GAN loss print(vol3d_real) print(img2d_real) print(vol3d_fake) print(img2d_fake) G_loss, D_loss = self.build_losses(tf.concat([vol3d_real, img2d_real], axis=0), tf.concat([vol3d_fake, img2d_fake], axis=0)) losses.append(G_loss) self.g_loss = tf.reduce_sum(losses, name='G_loss_total') self.d_loss = tf.reduce_sum([D_loss], name='D_loss_total') self.collect_variables() out_vol3d = tf.identity(vol3d, name='out_vol3d') out_vol3d_decoded = tf.identity(vol3d_decoded, name='out_vol3d_decoded') with tf.name_scope('visualization'): mid=128 viz_vol_0 = vol3d[mid-2:mid-1,...] viz_vol_1 = vol3d[mid-1:mid-0,...] viz_vol_2 = vol3d[mid+0:mid+1,...] viz_vol_3 = vol3d[mid+1:mid+2,...] viz_vol_4 = vol3d_decoded[mid-2:mid-1,...] viz_vol_5 = vol3d_decoded[mid-1:mid-0,...] viz_vol_6 = vol3d_decoded[mid+0:mid+1,...] viz_vol_7 = vol3d_decoded[mid+1:mid+2,...] viz_vol_8 = vol2d viz_vol_9 = vol2d_decoded #### viz_img_0 = img3d_decoded[mid-2:mid-1,...] viz_img_1 = img3d_decoded[mid-1:mid-0,...] viz_img_2 = img3d_decoded[mid+0:mid+1,...] viz_img_3 = img3d_decoded[mid+1:mid+2,...] viz_img_4 = img2d viz_img_5 = img2d_decoded viz_img_6 = img3d_encoded viz_zeros = tf.zeros_like(img2d) # Visualization viz = tf.concat([tf.concat([viz_vol_0, viz_vol_1, viz_vol_2, viz_vol_3, viz_vol_8, viz_img_4], 2), tf.concat([viz_vol_4, viz_vol_5, viz_vol_6, viz_vol_7, viz_vol_9, viz_img_5], 2), tf.concat([viz_img_0, viz_img_1, viz_img_2, viz_img_3, viz_img_6, viz_img_4], 2), ], 1) viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') tf.summary.image('colorized', viz, max_outputs=50)
def build_graph(self, *inputs): mimg, mflag, pimg, pflag, pve, target = inputs[:6] h_stats = list(inputs[6:]) def img_flag_to_feat(img, flag, embed): # B x maxD x maxD x layer_embedding_size feat = tf.nn.embedding_lookup(embed, img) feat = Dropout(feat, keep_prob=self.dropout_kp) # concat connection feature with layer-wise flag feature. flag_feat = tf.reshape( tf.tile(flag, [1, 1, self.max_depth]), [-1, self.max_depth, self.max_depth, self.n_flags]) flag_feat = tf.cast(flag_feat, tf.float32) l = tf.concat([feat, flag_feat], axis=3, name='concat_feats') # feature are now NCHW format l = tf.transpose(l, [0, 3, 1, 2]) # make the feature tensor symmetry on HxW lower_l = tf.matrix_band_part(l, -1, 0) upper_l = tf.matrix_transpose(lower_l) diag_l = tf.matrix_band_part(l, 0, 0) l = lower_l + upper_l - diag_l return l with tf.variable_scope(self.vs_name): # embed the connection types. initializer = tf.random_uniform_initializer(-0.1, 0.1) vocab_size = LayerTypes.num_layer_types() embeddingW = tf.get_variable( 'embedding', [vocab_size, self.layer_embedding_size], initializer=initializer) mfeat = img_flag_to_feat(mimg, mflag, embeddingW) pfeat = img_flag_to_feat(pimg, pflag, embeddingW) l = tf.concat(values=[mfeat, pfeat], axis=1) data_format = 'channels_first' ch_dim = 1 # network on the combined feature. with argscope([Conv2D, Deconv2D, GroupedConv2D, AvgPooling, MaxPooling, \ BatchNorm, GlobalAvgPooling, ResizeImages, SeparableConv2D], \ data_format=data_format), \ argscope([Conv2D, Deconv2D, GroupedConv2D, SeparableConv2D], \ activation=tf.identity, use_bias=False): n_layers_per_scale = 4 n_scales = 4 out_filters = l.get_shape().as_list()[ch_dim] for si in range(n_scales): for li in range(n_layers_per_scale): name = 'layer{:03d}'.format(si * n_layers_per_scale + li) strides = 1 if li == 0 and si > 0: strides = 2 out_filters *= 2 with tf.variable_scope(name): l = residual_bottleneck_layer( 'res_btl', l, out_filters, strides, data_format) # only use the last output for predicting the child model accuracy l = GlobalAvgPooling('gap', l) pve = tf.reshape(pve, [-1, 1]) h_stats = [tf.reshape(hs, [-1, 1]) for hs in h_stats] l = tf.concat(values=[pve, l] + h_stats, axis=ch_dim) pred = FullyConnected('fully_connect', l, 1, activation=tf.sigmoid) pred = tf.reshape(pred, [-1]) self.pred = tf.identity(pred, name='predicted_accuracy') cost = tf.losses.mean_squared_error(target, self.pred) self.cost = tf.identity(cost, name='cost') add_moving_summary(self.cost) return self.cost
def _build_graph(self, inputs): image, label = inputs image = image / 128.0 def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype): stride = 2 if nr1x1 == 0 else 1 with tf.variable_scope(name): outs = [] if nr1x1 != 0: outs.append(Conv2D('conv1x1', x, nr1x1, 1)) x2 = Conv2D('conv3x3r', x, nr3x3r, 1) outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride)) x3 = Conv2D('conv233r', x, nr233r, 1) x3 = Conv2D('conv233a', x3, nr233, 3) outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride)) if pooltype == 'max': x4 = MaxPooling('mpool', x, 3, stride, padding='SAME') else: assert pooltype == 'avg' x4 = AvgPooling('apool', x, 3, stride, padding='SAME') if nrpool != 0: # pool + passthrough if nrpool == 0 x4 = Conv2D('poolproj', x4, nrpool, 1) outs.append(x4) return tf.concat(outs, 3, name='concat') with argscope(Conv2D, nl=BNReLU, use_bias=False): l = (LinearWrap(image).Conv2D('conv0', 64, 7, stride=2).MaxPooling( 'pool0', 3, 2, padding='SAME').Conv2D('conv1', 64, 1).Conv2D( 'conv2', 192, 3).MaxPooling('pool2', 3, 2, padding='SAME')()) # 28 l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg') l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg') l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max') br1 = (LinearWrap(l).Conv2D('loss1conv', 128, 1).FullyConnected( 'loss1fc', 1024, nl=tf.nn.relu).FullyConnected('loss1logit', 1000, nl=tf.identity)()) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=br1, labels=label) loss1 = tf.reduce_mean(loss1, name='loss1') # 14 l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg') l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg') l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg') l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg') l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max') br2 = Conv2D('loss2conv', l, 128, 1) br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu) br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity) loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=br2, labels=label) loss2 = tf.reduce_mean(loss2, name='loss2') # 7 l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg') l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max') l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) tf.nn.softmax(logits, name='output') loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss3 = tf.reduce_mean(loss3, name='loss3') cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost') add_moving_summary([cost, loss1, loss2, loss3]) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(wd_cost, self.cost)
def build_graph(self, img, box, mask, bbx, z, z2): with tf.name_scope('preprocess'): img_crop = tf.multiply(img, 1 - box) img = (img / 128.0 - 1.0) img_crop = (img_crop / 128.0 - 1.0) bin_mask = mask/255. mask = (mask / 128.0 - 1.0) with tf.name_scope('styleIn'): style_shape_z2 = [tf.shape(mask)[0], 1, 1, STYLE_DIM_z2] z3 = tf.random_normal(style_shape_z2, mean=0.0, stddev=1.0, dtype=tf.float32, name='z3') def vizN(name, a): with tf.name_scope(name): im = tf.concat(a, axis=2) im = (im + 1.0) * 128 im = tf.clip_by_value(im, 0, 255) im = tf.cast(im, tf.uint8, name='viz') tf.summary.image(name, im, max_outputs=50) # use the initializers from torch with argscope([Conv2D, Conv2DTranspose, tf.layers.conv2d]): #Let us encode the images with tf.variable_scope('gen'): bin_gen_mask_gt = tf.round((mask + 1) * 0.5) in_gen_gt = img *(1-bin_gen_mask_gt) - bin_gen_mask_gt with tf.variable_scope('senc'): zgt_mean, zgt_var = self.style_encoder(img*bin_gen_mask_gt) zgt = z_sample(zgt_mean, zgt_var) zmat = tf.tile(zgt, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1]) z2mat = tf.tile(z2, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1]) z3mat = tf.tile(z3, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1]) with tf.variable_scope('genRGB'): gen_im = self.generator(in_gen_gt, z2mat, nb_blocks) gen_im = gen_im*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt) gen_im_z3 = self.generator(in_gen_gt, z3mat, nb_blocks) gen_im_z3 = gen_im_z3*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt) gen_im_gt = self.generator(in_gen_gt, zmat, nb_blocks) gen_im_gt = gen_im_gt*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt) with tf.variable_scope('senc'): z3_recon, _ = self.style_encoder(gen_im_z3*bin_gen_mask_gt) f1, f2, f3, f4 = self.vgg_16(tf.concat([(img+1)*0.5, (gen_im_gt+1)*0.5], axis=0)) #The final discriminator that takes them both discrim_out_mask = [] discrim_fm_real_mask = [] discrim_fm_fake_mask = [] discrim_out = [] discrim_out_z3 = [] discrim_fm_real = [] discrim_fm_fake = [] with tf.variable_scope('discrim'): with tf.variable_scope('discrim_im'): D_input_real = tf.concat([img, mask], axis=-1) D_input_fake = tf.concat([gen_im_gt, mask], axis=-1) D_inputs = [D_input_real, D_input_fake] for s in range(DIS_SCALE): with tf.variable_scope('s%d'%s): if s != 0: D_inputs = [downsample(im) for im in D_inputs] im_s, im_recon_s = D_inputs with tf.variable_scope('Ax'): Ax_feats_real, Ax_fm_real = self.discrim_enc(im_s) Ax_feats_fake, Ax_fm_fake = self.discrim_enc(im_recon_s) with tf.variable_scope('Ah'): Ah_dis_real, Ah_fm_real = self.discrim_patch_classify(Ax_feats_real) Ah_dis_fake, Ah_fm_fake = self.discrim_patch_classify(Ax_feats_fake) discrim_out.append((Ah_dis_real, Ah_dis_fake)) discrim_fm_real += Ax_fm_real + Ah_fm_real discrim_fm_fake += Ax_fm_fake + Ah_fm_fake with tf.variable_scope('discrim_im', reuse=True): D_input_real = tf.concat([img, mask], axis=-1) D_input_fake = tf.concat([gen_im_z3, mask], axis=-1) D_inputs = [D_input_real, D_input_fake] for s in range(DIS_SCALE): with tf.variable_scope('s%d'%s): if s != 0: D_inputs = [downsample(im) for im in D_inputs] im_s, im_recon_s = D_inputs with tf.variable_scope('Ax'): Ax_feats_real, _ = self.discrim_enc(im_s) Ax_feats_fake, _ = self.discrim_enc(im_recon_s) with tf.variable_scope('Ah'): Ah_dis_real, _ = self.discrim_patch_classify(Ax_feats_real) Ah_dis_fake, _ = self.discrim_patch_classify(Ax_feats_fake) discrim_out_z3.append((Ah_dis_real, Ah_dis_fake)) vizN('A_recon', [img, gen_im_gt, gen_im_z3, gen_im]) def LSGAN_hinge_loss(real, fake): d_real = tf.reduce_mean(-tf.minimum(0., tf.subtract(real, 1.)), name='d_real') d_fake = tf.reduce_mean(-tf.minimum(0., tf.add(-fake,-1.)), name='d_fake') d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss') g_loss = tf.reduce_mean(-fake, name='g_loss') # add_moving_summary(g_loss) return g_loss, d_loss numelmask = tf.reduce_sum(bin_gen_mask_gt, axis=[1, 2, 3]) numelall = tf.ones_like(numelmask) * SHAPE * SHAPE numelmask = tf.where(tf.equal(numelmask, 0), numelall, numelmask) weight_recon_loss = numelall / numelmask with tf.name_scope('losses'): with tf.name_scope('RGB_losses'): with tf.name_scope('GAN_loss'): # gan loss G_loss, D_loss = zip(*[LSGAN_hinge_loss(real, fake) for real, fake in discrim_out]) G_loss = tf.add_n(G_loss, name='lsgan_loss') D_loss = tf.add_n(D_loss, name='Disc_loss') with tf.name_scope('GAN_loss_z3'): # gan loss G_loss_z3, D_loss_z3 = zip(*[LSGAN_hinge_loss(real, fake) for real, fake in discrim_out_z3]) G_loss_z3 = tf.add_n(G_loss_z3, name='lsgan_loss') D_loss_z3 = tf.add_n(D_loss_z3, name='Disc_loss') with tf.name_scope('z_recon_loss'): z3_recon_loss = tf.reduce_mean(tf.abs(z3 - z3_recon), name='z3_recon_loss') with tf.name_scope('FM_loss'): FM_loss = [tf.reduce_mean(tf.abs(j - k))for j,k in zip(discrim_fm_real, discrim_fm_fake)] FM_loss = tf.add_n(FM_loss)/len(FM_loss) with tf.name_scope('im_recon_loss'): im_recon_loss = tf.reduce_mean(tf.reduce_mean(tf.abs(img - gen_im_gt), axis=[1,2,3])*weight_recon_loss) with tf.name_scope('kl_loss'): KLloss = kl_loss(zgt_mean, zgt_var) with tf.name_scope('perceptualLoss'): f3_1, f3_2 = tf.split(f3, 2, 0) # perceptual_loss = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(f3_1, f3_2), axis=[1,2,3])*weight_recon_loss) perceptual_loss = tf.nn.l2_loss(f3_1-f3_2)/tf.to_float(tf.size(f3_1)) # perceptual_loss = percep_loss([f1, f2, f3, f4], [1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0]) LAMBDA = 10.0 LAMBDA_KL = 0.05 self.g_loss = G_loss/DIS_SCALE + G_loss_z3/DIS_SCALE + LAMBDA*FM_loss + LAMBDA*im_recon_loss + LAMBDA_KL*KLloss \ + LAMBDA*perceptual_loss self.d_loss = D_loss + D_loss_z3 self.z_loss = LAMBDA * z3_recon_loss self.collect_variables('gen', 'discrim') tf.summary.histogram('z_var', zgt_var) tf.summary.histogram('z_mean', zgt_mean) add_moving_summary(G_loss, D_loss, FM_loss, im_recon_loss, KLloss, z3_recon_loss, perceptual_loss)
def _build_graph(self, inputs): image, label = inputs image = image / 128.0 assert tf.test.is_gpu_available() image = tf.transpose(image, [0, 3, 1, 2]) def residual(name, l, increase_dim=False, first=False): shape = l.get_shape().as_list() in_channel = shape[1] if increase_dim: out_channel = in_channel * 2 stride1 = 2 else: out_channel = in_channel stride1 = 1 with tf.variable_scope(name): b1 = l if first else BNReLU(l) c1 = Conv2D('conv1', b1, out_channel, stride=stride1, nl=BNReLU) c2 = Conv2D('conv2', c1, out_channel) if increase_dim: l = AvgPooling('pool', l, 2) l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]]) l = c2 + l return l with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \ argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3, W_init=variance_scaling_initializer(mode='FAN_OUT')): l = Conv2D('conv0', image, 16, nl=BNReLU) l = residual('res1.0', l, first=True) for k in range(1, self.n): l = residual('res1.{}'.format(k), l) # 32,c=16 l = residual('res2.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res2.{}'.format(k), l) # 16,c=32 l = residual('res3.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res3.' + str(k), l) l = BNReLU('bnlast', l) # 8,c=64 l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): real_sample = inputs[0] real_sample = tf.expand_dims(real_sample, -1) # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM) self.factors = ProductDistribution("factors", [ CategoricalDistribution("cat", 10), GaussianWithUniformSample("uni_a", 1), GaussianWithUniformSample("uni_b", 1) ]) # prior: the assumption how the factors are presented in the dataset prior = tf.constant([0.1] * 10 + [0, 0], tf.float32, [12], name='prior') batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1], name='batch_prior') # sample the latent code: zc = symbf.shapeless_placeholder(self.factors.sample(BATCH, prior), 0, name='z_code') z_noise = symbf.shapeless_placeholder(tf.random_uniform( [BATCH, NOISE_DIM], -1, 1), 0, name='z_noise') z = tf.concat([zc, z_noise], 1, name='z') with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_sample = self.generator(z) fake_sample_viz = tf.cast((fake_sample) * 255.0, tf.uint8, name='viz') tf.summary.image('gen', fake_sample_viz, max_outputs=30) # may need to investigate how bn stats should be updated across two discrim with tf.variable_scope('discrim'): real_pred, _ = self.discriminator(real_sample) fake_pred, dist_param = self.discriminator(fake_sample) """ Mutual information between x (i.e. zc in this case) and some information s (the generated samples in this case): I(x;s) = H(x) - H(x|s) = H(x) + E[\log P(x|s)] The distribution from which zc is sampled, in this case, is set to a fixed prior already. For the second term, we can maximize its variational lower bound: E_{x \sim P(x|s)}[\log Q(x|s)] where Q(x|s) is a proposal distribution to approximate P(x|s). Here, Q(x|s) is assumed to be a distribution which shares the form of self.factors, and whose parameters are predicted by the discriminator network. """ with tf.name_scope("mutual_information"): ents = self.factors.entropy(zc, batch_prior) entropy = tf.add_n(ents, name='total_entropy') # Note that dropping this term has no effect because the entropy # of prior is a constant. The paper mentioned it but didn't use it. # Adding this term may make the curve less stable because the # entropy estimated from the samples is not the true value. # post-process output vector from discriminator to obtain valid distribution parameters encoder_activation = self.factors.encoder_activation(dist_param) cond_ents = self.factors.entropy(zc, encoder_activation) cond_entropy = tf.add_n(cond_ents, name="total_conditional_entropy") MI = tf.subtract(entropy, cond_entropy, name='mutual_information') summary.add_moving_summary(entropy, cond_entropy, MI, *ents) # default GAN objective self.build_losses(real_pred, fake_pred) # subtract mutual information for latent factors (we want to maximize them) self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss') self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss') summary.add_moving_summary(self.g_loss, self.d_loss) # distinguish between variables of generator and discriminator updates self.collect_variables()
def build_graph(self, x, bboxes_xyz, bboxes_lwh, box3d_pts_label, semantic_labels, heading_labels, heading_residuals, size_labels, size_residuals): # def build_graph(self, x, bboxes_xyz, bboxes_lwh, semantic_labels, heading_labels, heading_residuals, size_labels, size_residuals): l0_xyz = x l0_points = None # Set Abstraction layers l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=2048, radius=0.2, nsample=64, mlp=[64, 64, 128], mlp2=None, group_all=False, scope='sa1') l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=1024, radius=0.4, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=512, radius=0.8, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa3') l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=256, radius=1.2, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa4') # Feature Propagation layers l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], scope='fp1') seeds_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], scope='fp2') seeds_xyz = l2_xyz # Voting Module layers offset = self.hough_voting_mlp(seeds_points) votes_xyz_points = tf.concat([seeds_xyz, seeds_points], 2) + offset votes_xyz, votes_points = tf.slice(votes_xyz_points, (0, 0, 0), (-1, -1, 3)), \ tf.slice(votes_xyz_points, (0, 0, 3), (-1, -1, -1)) vote_reg_loss = self.vote_reg_loss(seeds_xyz, votes_xyz, bboxes_xyz, box3d_pts_label) # Proposal Module layers # Farthest point sampling on seeds proposals_xyz, proposals_output, _ = pointnet_sa_module( votes_xyz, votes_points, npoint=config.PROPOSAL_NUM, radius=0.3, nsample=64, mlp=[128, 128, 128], mlp2=[128, 128, 5 + 2 * config.NH + 4 * config.NS + config.NC], group_all=False, scope='proposal') nms_iou = tf.get_variable('nms_iou', shape=[], initializer=tf.constant_initializer(0.25), trainable=False) if not get_current_tower_context().is_training: def get_3d_bbox(box_size, heading_angle, center): batch_size = tf.shape(heading_angle)[0] c = tf.cos(heading_angle) s = tf.sin(heading_angle) zeros = tf.zeros_like(c) ones = tf.ones_like(c) rotation = tf.reshape( tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c], -1), tf.stack([batch_size, -1, 3, 3])) l, w, h = box_size[..., 0], box_size[..., 1], box_size[ ..., 2] # lwh(xzy) order!!! corners = tf.reshape( tf.stack([ l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2 ], -1), tf.stack([batch_size, -1, 3, 8])) return tf.einsum('ijkl,ijlm->ijmk', rotation, corners) + tf.expand_dims( center, 2) # B * N * 8 * 3 class_mean_size_tf = tf.constant(class_mean_size) size_cls_pred = tf.argmax( proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH + config.NS], axis=-1) size_cls_pred_onehot = tf.one_hot(size_cls_pred, depth=config.NS, axis=-1) # B * N * NS size_residual_pred = tf.reduce_sum( tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape( proposals_output[..., 5 + 2 * config.NH + config.NS:5 + 2 * config.NH + 4 * config.NS], (-1, config.PROPOSAL_NUM, config.NS, 3)), axis=2) size_pred = tf.gather_nd( class_mean_size_tf, tf.expand_dims(size_cls_pred, -1)) * tf.maximum( 1 + size_residual_pred, 1e-6) # B * N * 3: size # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]): center_pred = proposals_xyz + proposals_output[..., 2:5] # B * N * 3 heading_cls_pred = tf.argmax(proposals_output[..., 5:5 + config.NH], axis=-1) heading_cls_pred_onehot = tf.one_hot(heading_cls_pred, depth=config.NH, axis=-1) heading_residual_pred = tf.reduce_sum( heading_cls_pred_onehot * proposals_output[..., 5 + config.NH:5 + 2 * config.NH], axis=2) heading_pred = tf.floormod( (tf.cast(heading_cls_pred, tf.float32) * 2 + heading_residual_pred) * np.pi / config.NH, 2 * np.pi) # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]): bboxes = get_3d_bbox( size_pred, heading_pred, center_pred) # B * N * 8 * 3, lhw(xyz) order!!! # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1) # B * N * 6, lhw(xyz) order!!! # with tf.control_dependencies([tf.print(bboxes[0, 0])]): nms_idx = NMS3D(bboxes, tf.reduce_max(proposals_output[..., -config.NC:], axis=-1), proposals_output[..., :2], nms_iou) # Nnms * 2 bboxes_pred = tf.gather_nd(bboxes, nms_idx, name='bboxes_pred') # Nnms * 8 * 3 class_scores_pred = tf.gather_nd( proposals_output[..., -config.NC:], nms_idx, name='class_scores_pred') # Nnms * C batch_idx = tf.identity( nms_idx[:, 0], name='batch_idx' ) # Nnms, this is used to identify between batches return # calculate positive and negative proposal idxes bboxes_xyz_gt = bboxes_xyz # B * BB * 3 bboxes_labels_gt = semantic_labels # B * BB bboxes_heading_labels_gt = heading_labels bboxes_heading_residuals_gt = heading_residuals bboxes_size_labels_gt = size_labels bboxes_size_residuals_gt = size_residuals dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) - tf.expand_dims(bboxes_xyz_gt, 1), axis=-1) # B * PR * BB bboxes_assignment = tf.argmin(dist_mat, axis=-1) # B * PR min_dist = tf.reduce_min(dist_mat, axis=-1) thres_mid = tf.reduce_mean(min_dist, axis=-1, keepdims=True) thres_min = tf.reduce_min(min_dist, axis=-1, keepdims=True) thres_max = tf.reduce_max(min_dist, axis=-1, keepdims=True) POSITIVE_THRES, NEGATIVE_THRES = (thres_mid + thres_min) / 2.0, ( thres_mid + thres_max) / 2.0 positive_idxes = tf.where(min_dist < POSITIVE_THRES) negative_idxes = tf.where(min_dist > NEGATIVE_THRES) positive_gt_idxes = tf.stack([ positive_idxes[:, 0], tf.gather_nd(bboxes_assignment, positive_idxes) ], axis=1) # objectiveness loss pos_obj_cls_score = tf.gather_nd(proposals_output[..., :2], positive_idxes) pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32) neg_obj_cls_score = tf.gather_nd(proposals_output[..., :2], negative_idxes) neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]], dtype=tf.int32) obj_cls_loss = tf.identity( (tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pos_obj_cls_score, labels=pos_obj_cls_gt)) + tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=neg_obj_cls_score, labels=neg_obj_cls_gt))) / 2.0, name='obj_cls_loss') obj_correct = tf.concat([ tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1), tf.float32), tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1), tf.float32) ], axis=0, name='obj_correct') obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy') # center regression losses center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes) delta_predicted = tf.gather_nd(proposals_output[..., 2:5], positive_idxes) delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes) center_loss = tf.reduce_mean( tf.reduce_sum(tf.losses.huber_loss( labels=delta_gt, predictions=delta_predicted, reduction=tf.losses.Reduction.NONE), axis=-1)) # Appendix A1: chamfer loss, assignment at one bbox to each gt bbox bboxes_assignment_dual = tf.argmin(dist_mat, axis=1) # B * BB batch_idx = tf.tile( tf.expand_dims(tf.range( tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]), axis=-1), [1, tf.shape(bboxes_assignment_dual)[1]]) # B * BB delta_gt_dual = bboxes_xyz_gt - tf.gather_nd( proposals_xyz, tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3 delta_predicted_dual = tf.gather_nd( proposals_output[..., 2:5], tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3) center_loss_dual = tf.reduce_mean( tf.reduce_sum(tf.losses.huber_loss( labels=delta_gt_dual, predictions=delta_predicted_dual, reduction=tf.losses.Reduction.NONE), axis=-1)) # add up center_loss += center_loss_dual center_loss = tf.identity(center_loss, 'center_loss') # heading classification loss heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt, positive_gt_idxes) heading_cls_score = tf.gather_nd( proposals_output[..., 5:5 + config.NH], positive_idxes) heading_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=heading_cls_score, labels=heading_cls_gt), name='heading_cls_loss') # heading residual loss heading_cls_gt_onehot = tf.one_hot(heading_cls_gt, depth=config.NH, on_value=1, off_value=0, axis=-1) # Np * NH heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt, positive_gt_idxes) / ( np.pi / config.NH) # Np heading_residual_predicted = tf.gather_nd( proposals_output[..., 5 + config.NH:5 + 2 * config.NH], positive_idxes) # Np * NH heading_residual_loss = tf.losses.huber_loss(labels=heading_residual_gt, predictions=tf.reduce_sum(heading_residual_predicted * \ tf.to_float(heading_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.MEAN) heading_residual_loss = tf.identity(heading_residual_loss, name='heading_residual_loss') # Size loss size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes) size_cls_score = tf.gather_nd( proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH + config.NS], positive_idxes) size_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=size_cls_score, labels=size_cls_gt), name='size_cls_loss') # size residual loss size_cls_gt_onehot = tf.one_hot(size_cls_gt, depth=config.NS, on_value=1, off_value=0, axis=-1) # Np * NS size_cls_gt_onehot = tf.tile( tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1), [1, 1, 3]) # Np * NS * 3 mean_size_arr_expand = tf.expand_dims( tf.constant(class_mean_size, dtype=tf.float32), 0) # (1, NS, 3) mean_size_label = tf.reduce_sum(size_cls_gt_onehot * mean_size_arr_expand, axis=[1]) # (P, 3) size_residual_gt = tf.gather_nd( bboxes_size_residuals_gt, positive_gt_idxes) / mean_size_label # Np * 3 size_residual_predicted = tf.reshape( tf.gather_nd( proposals_output[..., 5 + 2 * config.NH + config.NS:5 + 2 * config.NH + 4 * config.NS], positive_idxes), (-1, config.NS, 3)) # Np * NS * 3 size_residual_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss( labels=size_residual_gt, predictions=tf.reduce_sum(size_residual_predicted * tf.to_float(size_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.NONE), axis=-1), name='size_residual_loss') box_loss = tf.identity(center_loss + 0.1 * heading_cls_loss + heading_residual_loss + 0.1 * size_cls_loss + size_residual_loss, name='box_loss') # semantic loss sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:], positive_idxes) sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes) # Np sem_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=sem_cls_score, labels=sem_cls_gt), name='sem_cls_loss') sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1), tf.float32, name='sem_correct') sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch # summary.add_moving_summary(obj_accuracy, sem_accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. # no weight decay wd_cost = tf.multiply(1e-5, regularize_cost('.*/W', tf.nn.l2_loss), name='regularize_loss') total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss total_cost = tf.identity(total_cost, name='total_loss') summary.add_moving_summary(total_cost, vote_reg_loss, obj_cls_loss, box_loss, center_loss, heading_cls_loss, heading_residual_loss, size_cls_loss, size_residual_loss, sem_cls_loss, wd_cost, obj_accuracy, sem_accuracy, decay=0) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): # sImg2d # sImg the projection 2D, reshape from vol3d, img2d, condition = inputs # Split the input with tf.variable_scope('gen'): # Step 0; run thru 3d encoder with tf.variable_scope('encoder_3d'): vol2d = self.vol3d_encoder(vol3d) # Step 1: Run thru the encoder with tf.variable_scope('encoder_vgg19'): vol2d_encoded = self.vgg19_encoder(vol2d) img2d_encoded = self.vgg19_encoder(img2d) # Step 2: Run thru the adain block to get t=AdIN(f(c), f(s)) with tf.variable_scope('style_transfer'): merge_encoded = self._build_adain_layers( vol2d_encoded, img2d_encoded) condition = tf.reshape(condition, []) # Make 0 rank for condition chose_encoded = tf.cond( condition > 0, # if istest turns on, perform statistical transfering lambda: tf.identity(merge_encoded), lambda: tf.identity(vol2d_encoded )) #else get the img2d_encoded img2d_encoded = tf.identity(img2d_encoded) # Step 3: Run thru the decoder to get the paint image with tf.variable_scope('decoder_vgg19'): vol2d_decoded = self.vgg19_decoder(chose_encoded) img2d_decoded = self.vgg19_decoder(img2d_encoded) with tf.variable_scope('decoder_3d'): vol3d_decoded = self.vol3d_decoder(vol2d_decoded) img3d_decoded = self.vol3d_decoder(img2d_decoded) # Step 0; run thru 3d encoder # with tf.variable_scope('encoder_3d'): # img3d_encoded = self.vol3d_encoder(img3d_decoded) # # Step 3: Run thru the decoder to get the paint image # with tf.variable_scope('decoder_vgg19'): # vol3d_decoded = self.vgg19_decoder(chose_encoded) # img3d_decoded = self.vgg19_decoder(img2d_encoded) # # Step 0; run thru 3d encoder # with tf.variable_scope('encoder_3d'): # img3d_encoded = self.vol3d_encoder(img3d_decoded) # # Build losses here # with tf.name_scope('losses'): losses = [] # Content loss between t and f(g(t)) # loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d') loss_vol3d = tf.reduce_mean(tf.abs(vol3d - vol3d_decoded), name='loss_vol3d') # loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d') loss_img2d = tf.reduce_mean(tf.abs(img2d - img2d_decoded), name='loss_img2d') # loss_img3d = tf.reduce_mean(tf.abs(img2d - img3d_encoded), name='loss_img3d') # loss_img3d = tf.reduce_mean(tf.abs(img3d - img3d_decoded), name='loss_img3d') add_moving_summary(loss_vol3d) # add_moving_summary(loss_vol2d) add_moving_summary(loss_img2d) # add_moving_summary(loss_img3d) losses.append(2e0 * loss_vol3d) # losses.append(1e0*loss_vol2d) losses.append(1e0 * loss_img2d) # losses.append(1e0*loss_img3d) self.cost = tf.reduce_sum(losses, name='self.cost') add_moving_summary(self.cost) out_vol3d = tf.identity(vol3d, name='out_vol3d') out_vol3d_decoded = tf.identity(vol3d_decoded, name='out_vol3d_decoded') with tf.name_scope('visualization'): def tf_squeeze(any_tensor): return tf.reshape(tf.squeeze(any_tensor), [1, DIMY, DIMX, 3]) mid = 128 # viz_vol_0 = vol3d[mid-2:mid-1,...] # viz_vol_1 = vol3d[mid-1:mid-0,...] # viz_vol_2 = vol3d[mid+0:mid+1,...] # viz_vol_3 = vol3d[mid+1:mid+2,...] # viz_vol_4 = vol3d_decoded[mid-2:mid-1,...] # viz_vol_5 = vol3d_decoded[mid-1:mid-0,...] # viz_vol_6 = vol3d_decoded[mid+0:mid+1,...] # viz_vol_7 = vol3d_decoded[mid+1:mid+2,...] viz_vol_1 = tf_squeeze(vol3d[mid:mid + 1, ...]) viz_vol_2 = tf_squeeze(vol3d[:, mid:mid + 1, ...]) viz_vol_3 = tf_squeeze(vol3d[:, :, mid:mid + 1, ...]) viz_vol_0 = tf_squeeze(tf.zeros_like(viz_vol_1)) viz_vol_5 = tf_squeeze(vol3d_decoded[mid:mid + 1, ...]) viz_vol_6 = tf_squeeze(vol3d_decoded[:, mid:mid + 1, ...]) viz_vol_7 = tf_squeeze(vol3d_decoded[:, :, mid:mid + 1, ...]) viz_vol_4 = tf_squeeze(tf.zeros_like(viz_vol_5)) viz_vol_8 = vol2d # viz_vol_9 = vol2d_decoded #### # viz_img_0 = img3d_decoded[mid-2:mid-1,...] # viz_img_1 = img3d_decoded[mid-1:mid-0,...] # viz_img_2 = img3d_decoded[mid+0:mid+1,...] # viz_img_3 = img3d_decoded[mid+1:mid+2,...] viz_img_1 = tf_squeeze(img3d_decoded[mid:mid + 1, ...]) viz_img_2 = tf_squeeze(img3d_decoded[:, mid:mid + 1, ...]) viz_img_3 = tf_squeeze(img3d_decoded[:, :, mid:mid + 1, ...]) viz_img_0 = tf_squeeze(tf.zeros_like(viz_img_1)) viz_img_4 = img2d viz_img_5 = img2d_decoded # viz_img_6 = img3d_encoded viz_zeros = tf.zeros_like(img2d) # Visualization viz = tf.concat([ tf.concat( [viz_vol_1, viz_vol_2, viz_vol_3, viz_vol_8, viz_img_4], 2), tf.concat( [viz_vol_5, viz_vol_6, viz_vol_7, viz_zeros, viz_zeros], 2), tf.concat( [viz_img_1, viz_img_2, viz_img_3, viz_img_5, viz_img_4], 2), ], 1) viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') tf.summary.image('colorized', viz, max_outputs=50)
def feature_to_prediction_and_loss(scope_name, l, label, num_classes, prediction_feature, ch_dim, label_smoothing=0, dense_dropout_keep_prob=1.0, is_last=True): """ Given the feature l at scope_name, compute a classifier. """ with tf.variable_scope(scope_name): n_dim = len(l.get_shape().as_list()) if n_dim == 4 and not is_last: with tf.variable_scope('aux_preprocess'): l = tf.nn.relu(l) l = AvgPooling('pool', l, pool_size=5, strides=3, padding='valid') l = Conv2D('conv_proj', l, 128, 1, strides=1, activation=BNReLU) shape = l.get_shape().as_list() if ch_dim != 1: shape = shape[1:3] else: shape = shape[2:4] l = Conv2D('conv_flat', l, 768, shape, strides=1, padding='valid', activation=BNReLU) l = tf.layers.flatten(l) else: l = BNReLU('bnrelu_pred', l) ch_in = _get_dim(l, ch_dim) if prediction_feature == '1x1': ch_out = ch_in if n_dim == 4: l = Conv2D('conv1x1', l, ch_out, 1) else: assert n_dim == 2, n_dim l = FullyConnected('fc1x1', l, ch_out, activation=tf.identity) l = BNReLU('bnrelu1x1', l) elif prediction_feature == 'msdense': assert n_dim == 2, n_dim ch_inter = ch_in l = Conv2D('conv1x1_0', l, ch_inter, 3, strides=2) l = BNReLU('bnrelu1x1_0', l) l = Conv2D('conv1x1_1', l, ch_inter, 3, strides=2) l = BNReLU('bnrelu1x1_1', l) elif prediction_feature == 'bn': l = BatchNorm('bn', l) else: # Do nothing to the input feature pass if n_dim > 2: l = GlobalAvgPooling('gap', l) variables = [] if num_classes > 0: if is_last: l = Dropout('drop_pre_fc', l, keep_prob=dense_dropout_keep_prob) logits = FullyConnected('linear', l, num_classes, activation=tf.identity) variables.append(logits.variables.W) variables.append(logits.variables.b) tf.nn.softmax(logits, name='preds') ## local cost/error_rate if label_smoothing > 0: one_hot_labels = tf.one_hot(label, num_classes) cost = tf.losses.softmax_cross_entropy(\ onehot_labels=one_hot_labels, logits=logits, label_smoothing=label_smoothing) else: cost = tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') add_moving_summary(cost) def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'): return tf.cast(tf.logical_not( tf.nn.in_top_k(logits, label, topk)), tf.float32, name=name) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train_error')) wrong5 = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong5, name='train-error-top5')) else: # for regression: pred = FullyConnected('linear', l, 1, activation=tf.identity) variables.append(pred.variables.W) variables.append(pred.variables.b) pred = tf.nn.relu(pred) tf.identity(pred, name='preds') cost = tf.reduce_mean(0.5 * (pred - label)**2, name='mean_square_error') add_moving_summary(cost) return cost, variables
def _build_graph(self, inputs): G = tf.get_default_graph() # For round tf.local_variables_initializer() tf.global_variables_initializer() pi, pm, pl, ui, um, ul = inputs pi = cvt2tanh(pi) pm = cvt2tanh(pm) pl = cvt2tanh(pl) ui = cvt2tanh(ui) um = cvt2tanh(um) ul = cvt2tanh(ul) # def tf_membr(label): # with freeze_variables(): # label = np_2imag(label, maxVal=MAX_LABEL) # label = np.squeeze(label) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one # # label, nb_labels = skimage.measure.label(color, return_num=True) # # label = np.expand_dims(label, axis=-1).astype(np.float32) # Modify here for batch # # for z in range(membr.shape[0]): # # membr[z,...] = 1-skimage.segmentation.find_boundaries(np.squeeze(label[z,...]), mode='thick') #, mode='inner' # membr = 1-skimage.segmentation.find_boundaries(np.squeeze(label), mode='thick') #, mode='inner' # membr = np.expand_dims(membr, axis=-1).astype(np.float32) # membr = np.expand_dims(membr, axis=0).astype(np.float32) # membr = np_2tanh(membr, maxVal=1.0) # membr = np.reshape(membr, label.shape) # return membr # def tf_label(color): # with freeze_variables(): # color = np_2imag(color, maxVal=MAX_LABEL) # color = np.squeeze(color) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one # label, nb_labels = skimage.measure.label(color, return_num=True) # label = np.expand_dims(label, axis=-1).astype(np.float32) # label = np.expand_dims(label, axis=0).astype(np.float32) # label = np_2tanh(label, maxVal=MAX_LABEL) # label = np.reshape(label, color.shape) # return label def tf_rand_score(x1, x2): return 1.0 - adjusted_rand_score(x1.flatten(), x2.flatten()) def rounded(label, factor=MAX_LABEL, name='quantized'): with G.gradient_override_map({"Round": "Identity"}): with freeze_variables(): with tf.name_scope(name=name): label = cvt2imag(label, maxVal=factor) label = tf.round(label) label = cvt2tanh(label, maxVal=factor) return tf.identity(label, name=name) with argscope([Conv2D, Deconv2D, FullyConnected], W_init=tf.truncated_normal_initializer(stddev=0.02), use_bias=False), \ argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \ argscope([Conv2D, Deconv2D, BatchNorm], data_format='NHWC'), \ argscope(LeakyReLU, alpha=0.2): with tf.variable_scope('gen'): # Real pair image 4 gen with tf.variable_scope('I2M'): pim, feat_im = self.generator(pi) with tf.variable_scope('M2L'): piml, feat_iml = self.generator(pim) pml, feat_ml = self.generator(pm) # piml = tf.py_func(tf_label, [(pim)], tf.float32) # pml = tf.py_func(tf_label, [(pm)], tf.float32) # print pim # print piml # with tf.variable_scope('L2M'): # # with freeze_variables(): # pimlm = self.generator(piml) # # plm = self.generator(pl) # pmlm = self.generator(pml) # # pimlm = tf.py_func(tf_membr, [(piml)], tf.float32) # # # plm = tf.py_func(tf_membr, [(pl) ], tf.float32) # # pmlm = tf.py_func(tf_membr, [(pml) ], tf.float32) # # print piml # # print pimlm # with tf.variable_scope('M2I'): # pimlmi = self.generator(pimlm) # # pimi = self.generator(pim) # # Real pair label 4 gen # with tf.variable_scope('L2M'): # # with freeze_variables(): # plm = self.generator(pl) # # plm = tf.py_func(tf_membr, [(pl) , tf.float32]) # with tf.variable_scope('M2I'): # plmi = self.generator(plm) # pmi = self.generator(pi) # with tf.variable_scope('I2M'): # plmim = self.generator(plmi) # # pim = self.generator(pi) # pmim = self.generator(pmi) # with tf.variable_scope('M2L'): # plmiml = self.generator(plmim) # # plml = self.generator(plm) # # plmiml = tf.py_func(tf_label, [(plmim)], tf.float32) # # plml = tf.py_func(tf_label, [(plm)], tf.float32) with tf.variable_scope('discrim'): # with tf.variable_scope('I'): # i_dis_real = self.discriminator(ui) # i_dis_fake_from_label = self.discriminator(plmi) with tf.variable_scope('M'): m_dis_real = self.discriminator(um) m_dis_fake_from_image = self.discriminator(pim) # m_dis_fake_from_label = self.discriminator(plm) with tf.variable_scope('L'): l_dis_real = self.discriminator(ul) l_dis_fake_from_image = self.discriminator(piml) piml = rounded(piml) # pml = rounded(pml) # plmiml = rounded(plmiml) # # plml = rounded(plml) # with tf.name_scope('Recon_I_loss'): # recon_imi = tf.reduce_mean(tf.abs((pi) - (pimi)), name='recon_imi') # recon_lmi = tf.reduce_mean(tf.abs((pi) - (plmi)), name='recon_lmi') # recon_imlmi = tf.reduce_mean(tf.abs((pi) - (pimlmi)), name='recon_imlmi') # with tf.name_scope('Recon_L_loss'): # recon_lml = tf.reduce_mean(tf.abs((pl) - (plml)), name='recon_lml') recon_iml = tf.reduce_mean(tf.abs((pl) - (piml)), name='recon_iml') # recon_lmiml = tf.reduce_mean(tf.abs((pl) - (plmiml)), name='recon_lmiml') # with tf.name_scope('Recon_M_loss'): # recon_mim = tf.reduce_mean(tf.abs((pm) - (pmim)), name='recon_mim') # recon_mlm = tf.reduce_mean(tf.abs((pm) - (pmlm)), name='recon_mlm') recon_im = tf.reduce_mean(tf.abs((pm) - (pim)), name='recon_im') # recon_lm = tf.reduce_mean(tf.abs((pm) - (plm)), name='recon_lm') with tf.name_scope('GAN_loss'): # G_loss_IL, D_loss_IL = self.build_losses(i_dis_real, i_dis_fake_from_label, name='IL') G_loss_LI, D_loss_LI = self.build_losses(l_dis_real, l_dis_fake_from_image, name='LL') G_loss_MI, D_loss_MI = self.build_losses(m_dis_real, m_dis_fake_from_image, name='MI') # G_loss_ML, D_loss_ML = self.build_losses(m_dis_real, m_dis_fake_from_label, name='ML') # custom loss for membr with tf.name_scope('membr_loss'): def membr_loss(y_true, y_pred, name='membr_loss'): return tf.reduce_mean(tf.subtract( binary_cross_entropy(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0)), dice_coe(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0), axis=[1, 2, 3], loss_type='jaccard')), name=name) membr_im = membr_loss(pm, pim, name='membr_im') # print membr_im # membr_lm = membr_loss(pm, plm, name='membr_lm') # membr_imlm = membr_loss(pm, pimlm, name='membr_imlm') # membr_lmim = membr_loss(pm, plmim, name='membr_lmim') # membr_mlm = membr_loss(pm, pmlm, name='membr_mlm') # membr_mim = membr_loss(pm, pmim, name='membr_mim') # custom loss for label with tf.name_scope('label_loss'): def label_loss(y_true_L, y_pred_L, y_grad_M, name='label_loss'): g_mag_grad_M = cvt2imag(y_grad_M, maxVal=1.0) mag_grad_L = magnitute_central_difference(y_pred_L, name='mag_grad_L') cond = tf.greater(mag_grad_L, tf.zeros_like(mag_grad_L)) thresholded_mag_grad_L = tf.where( cond, tf.ones_like(mag_grad_L), tf.zeros_like(mag_grad_L), name='thresholded_mag_grad_L') gtv_guess = tf.multiply(g_mag_grad_M, thresholded_mag_grad_L, name='gtv_guess') loss_gtv_guess = tf.reduce_mean(gtv_guess, name='loss_gtv_guess') thresholded_mag_grad_L = cvt2tanh(thresholded_mag_grad_L, maxVal=1.0) gtv_guess = cvt2tanh(gtv_guess, maxVal=1.0) return loss_gtv_guess, thresholded_mag_grad_L label_iml, g_iml = label_loss(None, piml, pim, name='label_iml') # label_lml, g_lml = label_loss(None, plml, plm, name='label_lml') # label_lmiml, g_lmiml = label_loss(None, plmiml, plmim, name='label_lmiml') label_ml, g_ml = label_loss(None, pml, pm, name='label_loss_ml') # custom loss for tf_rand_score with tf.name_scope('rand_loss'): rand_iml = tf.reduce_mean( tf.cast(tf.py_func(tf_rand_score, [piml, pl], tf.float64), tf.float32)) rand_ml = tf.reduce_mean( tf.cast(tf.py_func(tf_rand_score, [pml, pl], tf.float64), tf.float32)) with tf.name_scope('discrim_loss'): def regDLF(y_true, y_pred, alpha=1, beta=1, gamma=0.01, delta_v=0.5, delta_d=1.5, name='loss_discrim'): def tf_norm(inputs, axis=1, epsilon=1e-7, name='safe_norm'): squared_norm = tf.reduce_sum(tf.square(inputs), axis=axis, keep_dims=True) safe_norm = tf.sqrt(squared_norm + epsilon) return tf.identity(safe_norm, name=name) ### y_true = tf.reshape(y_true, [DIMZ * DIMY * DIMX]) nDim = tf.shape(y_pred)[-1] X = tf.reshape(y_pred, [DIMZ * DIMY * DIMX, nDim]) uniqueLabels, uniqueInd = tf.unique(y_true) numUnique = tf.size( uniqueLabels) # Get the number of connected component Sigma = tf.unsorted_segment_sum(X, uniqueInd, numUnique) # ones_Sigma = tf.ones((tf.shape(X)[0], 1)) ones_Sigma = tf.ones_like(X) ones_Sigma = tf.unsorted_segment_sum(ones_Sigma, uniqueInd, numUnique) mu = tf.divide(Sigma, ones_Sigma) Lreg = tf.reduce_mean(tf.norm(mu, axis=1, ord=1)) T = tf.norm(tf.subtract(tf.gather(mu, uniqueInd), X), axis=1, ord=1) T = tf.divide(T, Lreg) T = tf.subtract(T, delta_v) T = tf.clip_by_value(T, 0, T) T = tf.square(T) ones_Sigma = tf.ones_like(uniqueInd, dtype=tf.float32) ones_Sigma = tf.unsorted_segment_sum(ones_Sigma, uniqueInd, numUnique) clusterSigma = tf.unsorted_segment_sum(T, uniqueInd, numUnique) clusterSigma = tf.divide(clusterSigma, ones_Sigma) # Lvar = tf.reduce_mean(clusterSigma, axis=0) Lvar = tf.reduce_mean(clusterSigma) mu_interleaved_rep = tf.tile(mu, [numUnique, 1]) mu_band_rep = tf.tile(mu, [1, numUnique]) mu_band_rep = tf.reshape(mu_band_rep, (numUnique * numUnique, nDim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # Remove zero vector # intermediate_tensor = reduce_sum(tf.abs(x), 1) # zero_vector = tf.zeros(shape=(1,1), dtype=tf.float32) # bool_mask = tf.not_equal(intermediate_tensor, zero_vector) # omit_zeros = tf.boolean_mask(x, bool_mask) intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), 1) zero_vector = tf.zeros(shape=(1, 1), dtype=tf.float32) bool_mask = tf.not_equal(intermediate_tensor, zero_vector) omit_zeros = tf.boolean_mask(mu_diff, bool_mask) mu_diff = tf.expand_dims(omit_zeros, axis=1) print mu_diff mu_diff = tf.norm(mu_diff, ord=1) # squared_norm = tf.reduce_sum(tf.square(s), axis=axis,keep_dims=True) # safe_norm = tf.sqrt(squared_norm + epsilon) # squared_norm = tf.reduce_sum(tf.square(omit_zeros), axis=-1,keep_dims=True) # safe_norm = tf.sqrt(squared_norm + 1e-6) # mu_diff = safe_norm mu_diff = tf.divide(mu_diff, Lreg) mu_diff = tf.subtract(2 * delta_d, mu_diff) mu_diff = tf.clip_by_value(mu_diff, 0, mu_diff) mu_diff = tf.square(mu_diff) numUniqueF = tf.cast(numUnique, tf.float32) Ldist = tf.reduce_mean(mu_diff) L = alpha * Lvar + beta * Ldist + gamma * Lreg print L print Ldist print Lvar print Lreg return tf.squeeze(L, name=name) discrim_im = regDLF(cvt2imag(pm, maxVal=1.0), feat_im, name='discrim_im') discrim_iml = regDLF(cvt2imag(pl, maxVal=MAX_LABEL), feat_iml, name='discrim_iml') discrim_ml = regDLF(cvt2imag(pl, maxVal=MAX_LABEL), feat_ml, name='discrim_ml') self.g_loss = tf.add_n( [ #(recon_imi), # + recon_lmi + recon_imlmi), # (recon_iml), # + recon_lml + recon_lmiml), # (recon_im), # + recon_lm + recon_mim + recon_mlm), (rand_iml), # + rand_lml + rand_lmiml), # (rand_ml), # + rand_lm + rand_mim + rand_mlm), # (G_loss_IL + G_loss_LI + G_loss_MI + G_loss_ML), (G_loss_LI + G_loss_MI), (discrim_im + discrim_iml + discrim_ml), ( membr_im ), # + membr_lm + membr_imlm + membr_lmim + membr_mlm + membr_mim), # (label_iml + label_lml + label_lmiml + label_ml) (label_iml + label_ml) ], name='G_loss_total') self.d_loss = tf.add_n( [ # (D_loss_IL + D_loss_LI + D_loss_MI + D_loss_ML), (D_loss_LI + D_loss_MI), ], name='D_loss_total') wd_g = regularize_cost('gen/.*/W', l2_regularizer(1e-5), name='G_regularize') wd_d = regularize_cost('discrim/.*/W', l2_regularizer(1e-5), name='D_regularize') self.g_loss = tf.add(self.g_loss, wd_g, name='g_loss') self.d_loss = tf.add(self.d_loss, wd_d, name='d_loss') self.collect_variables() add_moving_summary(self.d_loss, self.g_loss) add_moving_summary( recon_iml, recon_im, label_iml, label_ml, # rand_iml, # rand_ml, # membr_im # recon_imi, recon_lmi, recon_imlmi, # recon_lml, recon_iml, recon_lmiml, # recon_mim, recon_mlm, recon_im , recon_lm, ) viz = tf.concat( [ tf.concat([ui, pi, pim, piml, g_iml], 2), # tf.concat([ul, pl, plm, plmi, plmim, plmiml], 2), tf.concat([um, pl, pm, pml, g_ml], 2), # tf.concat([pl, pl, g_iml, g_lml, g_lmiml, g_ml], 2), ], 1) # add_moving_summary( # recon_imi, recon_lmi,# recon_imlmi, # recon_lml, recon_iml,# recon_lmiml, # recon_mim, recon_mlm, recon_im , recon_lm, # ) # viz = tf.concat([tf.concat([ui, pi, pim, piml], 2), # tf.concat([ul, pl, plm, plmi], 2), # tf.concat([um, pm, pmi, pmim], 2), # tf.concat([um, pm, pml, pmlm], 2), # ], 1) viz = cvt2imag(viz) viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz') tf.summary.image('colorized', viz, max_outputs=50)
def rpn_losses_iou(anchor_labels, anchor_boxes, gt_boxes, rpn_boxes, label_logits, box_logits, iou_logits): """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded gt_boxes: rpn_boxes: fHxfWxNA decoded label_logits: fHxfWxNA box_logits: fHxfWxNAx4 iou_logits: fHxfWxNA Returns: label_loss, box_loss, iou_loss """ with tf.device('/cpu:0'): valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor') # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0. valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero(tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) placeholder = 0.5 # A small value will make summaries appear lower. recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos)) recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th)) precision = tf.to_float( tf.truediv(pos_prediction_corr, nr_pos_prediction)) precision = tf.where(tf.equal(nr_pos_prediction, 0), placeholder, precision, name='precision_th{}'.format(th)) summaries.extend([precision, recall]) add_moving_summary(*summaries) # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # But the total RPN loss will be fine. TODO make the summary op smarter placeholder = 0. ce_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) # label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM) # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') # alpha = 0.75 # gamma = 2.0 # probs = tf.sigmoid(valid_label_logits) # alpha_t = tf.ones_like(valid_label_logits) * alpha # alpha_t = tf.where(valid_anchor_labels > 0, alpha_t, 1.0 - alpha_t) # probs_t = tf.where(valid_anchor_labels > 0, probs, 1.0 - probs) # weight_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma) # # label_loss = tf.reduce_sum(weight_matrix * label_loss) * (1. / cfg.RPN.BATCH_PER_IM) # # label_loss = weight_matrix * ce_loss # # #n_pos = tf.reduce_sum(valid_anchor_labels) # n_false = tf.reduce_sum(tf.cast(tf.greater(ce_loss, -tf.log(0.5)), tf.float32)) # def has_pos(): # return tf.reduce_sum(label_loss) / tf.cast(n_false, tf.float32) # def no_pos(): # return tf.reduce_sum(label_loss) # label_loss = tf.cond(n_false > 0, has_pos, no_pos) # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') # find the most wrongly classified examples: n_selected = cfg.FRCNN.BATCH_PER_IM n_selected = tf.cast(n_selected, tf.int32) n_selected = tf.minimum(n_selected, tf.size(valid_anchor_labels)) # label_loss = alpha_t * label_loss vals, _ = tf.nn.top_k(ce_loss, k=n_selected) try: th = vals[-1] except: th = 1 selected_mask = ce_loss >= th loss_weight = tf.cast(selected_mask, tf.float32) label_loss = tf.reduce_sum( ce_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight) label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 # box_loss = tf.losses.huber_loss( # pos_anchor_boxes, pos_box_logits, delta=delta, # reduction=tf.losses.Reduction.SUM) / delta box_loss = tf.losses.huber_loss(pos_anchor_boxes, pos_box_logits, reduction=tf.losses.Reduction.SUM) box_loss = box_loss * (50. / cfg.RPN.BATCH_PER_IM) box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss') # iou loss: smooth l1 loss rpn_boxes = tf.reshape(rpn_boxes, [-1, 4]) gt_boxes = tf.reshape(gt_boxes, [-1, 4]) iou = pairwise_iou(rpn_boxes, gt_boxes) # nxm max_iou = tf.reduce_max(iou, axis=1) # if only bg gt_boxes, all ious are 0. max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou), max_iou) max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1]), name='rpn_box_gt_iou') iou_logits = tf.nn.sigmoid(iou_logits) iou_logits = tf.reshape(iou_logits, [-1]) iou_loss = tf.losses.huber_loss(max_iou, iou_logits, reduction='none') n_selected = cfg.FRCNN.BATCH_PER_IM n_selected = tf.cast(n_selected, tf.int32) vals, _ = tf.nn.top_k(iou_loss, k=n_selected) th = vals[-1] selected_mask = iou_loss >= th loss_weight = tf.cast(selected_mask, tf.float32) iou_loss = tf.reduce_sum( iou_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight) iou_loss = tf.identity(iou_loss, name='iou_loss') add_moving_summary(label_loss, box_loss, iou_loss, nr_valid, nr_pos) return label_loss, box_loss, iou_loss
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training input, nextinput = inputs initializer = tf.random_uniform_initializer(-0.05, 0.05) def get_basic_cell(): cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=0.0, reuse=tf.get_variable_scope().reuse) if is_training: cell = rnn.DropoutWrapper(cell, output_keep_prob=DROPOUT) return cell cell = rnn.MultiRNNCell([get_basic_cell() for _ in range(NUM_LAYER)]) def get_v(n): return tf.get_variable(n, [BATCH, HIDDEN_SIZE], trainable=False, initializer=tf.constant_initializer()) state_var = [ rnn.LSTMStateTuple(get_v('c{}'.format(k)), get_v('h{}'.format(k))) for k in range(NUM_LAYER) ] self.state = state_var = tuple(state_var) embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer) input_feature = tf.nn.embedding_lookup( embeddingW, input) # B x seqlen x hiddensize input_feature = Dropout(input_feature, rate=DROPOUT) with tf.variable_scope('LSTM', initializer=initializer): input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1) # seqlen x (Bxhidden) outputs, last_state = rnn.static_rnn(cell, input_list, state_var, scope='rnn') # update the hidden state after a rnn loop completes update_state_ops = [] for k in range(NUM_LAYER): update_state_ops.extend([ tf.assign(state_var[k].c, last_state[k].c), tf.assign(state_var[k].h, last_state[k].h) ]) # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden logits = FullyConnected('fc', output, VOCAB_SIZE, activation=tf.identity, kernel_initializer=initializer, bias_initializer=initializer) xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.reshape(nextinput, [-1])) with tf.control_dependencies(update_state_ops): self.cost = tf.truediv(tf.reduce_sum(xent_loss), tf.cast(BATCH, tf.float32), name='cost') # log-perplexity perpl = tf.exp(self.cost / SEQ_LEN, name='perplexity') summary.add_moving_summary(perpl, self.cost)
def build_graph(self, *inputs): num_fpn_level = len(config.ANCHOR_STRIDES_FPN) assert len(config.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training image = inputs[0] input_anchors = inputs[1:1 + 2 * num_fpn_level] multilevel_anchor_labels = input_anchors[0::2] multilevel_anchor_boxes = input_anchors[1::2] gt_boxes, gt_labels = inputs[11], inputs[12] if config.MODE_MASK: gt_masks = inputs[-1] image = self.preprocess(image) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) # Multi-Level RPN Proposals multilevel_proposals = [] rpn_loss_collection = [] for lvl in range(num_fpn_level): rpn_label_logits, rpn_box_logits = rpn_head( 'rpn', p23456[lvl], config.FPN_NUM_CHANNEL, len(config.ANCHOR_RATIOS)) with tf.name_scope('FPN_lvl{}'.format(lvl + 2)): anchors = tf.constant(get_all_anchors_fpn()[lvl], name='rpn_anchor_lvl{}'.format(lvl + 2)) anchors, anchor_labels, anchor_boxes = \ self.narrow_to_featuremap(p23456[lvl], anchors, multilevel_anchor_labels[lvl], multilevel_anchor_boxes[lvl]) anchor_boxes_encoded = encode_bbox_target( anchor_boxes, anchors) pred_boxes_decoded = decode_bbox_target( rpn_box_logits, anchors) proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) multilevel_proposals.append((proposal_boxes, proposal_scores)) if is_training: label_loss, box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) rpn_loss_collection.extend([label_loss, box_loss]) # Merge proposals from multi levels, pick top K proposal_boxes = tf.concat([x[0] for x in multilevel_proposals], axis=0) # nx4 proposal_scores = tf.concat([x[1] for x in multilevel_proposals], axis=0) # n proposal_topk = tf.minimum( tf.size(proposal_scores), config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_boxes = tf.gather(proposal_boxes, topk_indices) if is_training: rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. rcnn_boxes = proposal_boxes roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_2fc_head( 'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS) if is_training: # rpn loss is already defined above with tf.name_scope('rpn_losses'): rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2], name='label_loss') rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2], name='box_loss') add_moving_summary(rpn_total_box_loss, rpn_total_label_loss) # fastrcnn loss: matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], fg_sampled_boxes, 14) mask_logits = maskrcnn_upXconv_head('maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n( rpn_loss_collection + [fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) mask_logits = maskrcnn_upXconv_head('maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, image, edgemap): image = image - tf.constant([104, 116, 122], dtype='float32') edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d') def branch(name, l, up): with tf.variable_scope(name): l = Conv2D('convfc', l, 1, kernel_size=1, activation=tf.identity, use_bias=True, kernel_initializer=tf.constant_initializer()) while up != 1: l = BilinearUpSample('upsample{}'.format(up), l, 2) up = up / 2 return l with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu): l = Conv2D('conv1_1', image, 64) l = Conv2D('conv1_2', l, 64) b1 = branch('branch1', l, 1) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2_1', l, 128) l = Conv2D('conv2_2', l, 128) b2 = branch('branch2', l, 2) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3_1', l, 256) l = Conv2D('conv3_2', l, 256) l = Conv2D('conv3_3', l, 256) b3 = branch('branch3', l, 4) l = MaxPooling('pool3', l, 2) l = Conv2D('conv4_1', l, 512) l = Conv2D('conv4_2', l, 512) l = Conv2D('conv4_3', l, 512) b4 = branch('branch4', l, 8) l = MaxPooling('pool4', l, 2) l = Conv2D('conv5_1', l, 512) l = Conv2D('conv5_2', l, 512) l = Conv2D('conv5_3', l, 512) b5 = branch('branch5', l, 16) final_map = Conv2D('convfcweight', tf.concat([b1, b2, b3, b4, b5], 3), 1, kernel_size=1, kernel_initializer=tf.constant_initializer(0.2), use_bias=False, activation=tf.identity) costs = [] for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1)) xentropy = class_balanced_sigmoid_cross_entropy( b, edgemap, name='xentropy{}'.format(idx + 1)) costs.append(xentropy) # some magic threshold pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction') wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32) wrong = tf.reduce_mean(wrong, name='train_error') if get_current_tower_context().is_training: wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') costs.append(wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W total_cost = tf.add_n(costs, name='cost') add_moving_summary(wrong, total_cost, *costs) return total_cost
def build_graph(self, image, label): is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) #return ternarize(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) image = image / 256.0; zp=0.25 with remap_variables(binarize_weight), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): logits = (LinearWrap(image) .Conv2D('conv0', np.round(48*zp), 5, padding='VALID', use_bias=True) .MaxPooling('pool0', 2, padding='SAME') .apply(activate) # 18 .Conv2D('conv1', np.round(64*zp), 3, padding='SAME') .apply(fg) .BatchNorm('bn1').apply(activate) .Conv2D('conv2', np.round(64*zp), 3, padding='SAME') .apply(fg) .BatchNorm('bn2') .MaxPooling('pool1', 2, padding='SAME') .apply(activate) # 9 .Conv2D('conv3', np.round(128*zp), 3, padding='VALID') .apply(fg) .BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', np.round(128*zp), 3, padding='SAME') .apply(fg) .BatchNorm('bn4').apply(activate) .Conv2D('conv5', np.round(128*zp), 3, padding='VALID') .apply(fg) .BatchNorm('bn5').apply(activate) # 5 .tf.nn.dropout(0.5 if is_training else 1.0) .Conv2D('conv6', np.round(512*zp), 5, padding='VALID') .apply(fg).BatchNorm('bn6') .apply(cabs) .FullyConnected('fc1', 10)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_tensor') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) total_cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, total_cost) return total_cost
def build_losses(self, logits_real, logits_fake, logits_s_pred, logits_s_true, extra_g=0, l2_norm=0.00001): r"""D and G play two-player minimax game with value function :math:`V(G,D)`. .. math:: min_G max_D V(D, G) = IE_{x \sim p_{data}} [log D(x)] + IE_{z \sim p_{fake}} [log (1 - D(G(z)))] Args: logits_real (tensorflow.Tensor): discrim logits from real samples. logits_fake (tensorflow.Tensor): discrim logits from fake samples from generator. extra_g(float): l2_norm(float): scale to apply L2 regularization. Returns: None """ with tf.name_scope("GAN_loss"): score_real = tf.sigmoid(logits_real) score_fake = tf.sigmoid(logits_fake) tf.summary.histogram('score-real', score_real) tf.summary.histogram('score-fake', score_fake) score_s_pred = tf.sigmoid(logits_s_pred) tf.summary.histogram('score-s-pred', score_s_pred) with tf.name_scope("discrim"): d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \ logits=logits_real, \ labels=tf.ones_like(logits_real)) * 0.7 + tf.random_uniform(tf.shape(logits_real), maxval=0.3), \ name='loss_real' ) d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \ logits=logits_fake, \ labels=tf.zeros_like(logits_fake)), \ name='loss_fake' ) d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real') d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake') d_loss = 0.5 * d_loss_pos + 0.5 * d_loss_neg + \ tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(l2_norm), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discrim")) self.d_loss = tf.identity(d_loss, name='loss') with tf.name_scope("fair"): f_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \ logits=logits_s_pred, \ labels=logits_s_true), \ name='loss_fair' ) s_pred_acc = tf.reduce_mean(tf.cast(score_s_pred < 0.5, tf.float32), name='accuracy_s') f_loss = - f_loss # f_loss = f_loss + tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_norm), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fair")) self.f_loss = tf.identity(f_loss, name='loss') with tf.name_scope("gen"): g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \ logits=logits_fake, \ labels=tf.ones_like(logits_fake))) + \ tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(l2_norm), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'gen')) g_loss = tf.identity(g_loss, name='loss') extra_g = tf.identity(extra_g, name='klloss') self.g_loss = tf.identity(g_loss + extra_g, name='final-g-loss') add_moving_summary(g_loss, extra_g, self.g_loss, self.d_loss, d_pos_acc, d_neg_acc, self.f_loss, s_pred_acc, decay=0.)
def _build_graph(self, inputs): inp, label = inputs is_training = get_current_tower_context().is_training fw, fa = get_dorefa(BITW, BITA) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name if not (name.endswith('W') or name.endswith('b') ) or 'linear0' in name or 'last_linear' in name: print("Not quantizing", name) return v else: logger.info("Quantizing weight {}".format(v.op.name)) return fw(v) def nonlin(x, name="activate"): return fa(tf.nn.relu(BNWithTrackedMults(x))) with remap_variables(binarize_weight), \ argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4): l = self.net_fn(inp, nonlin, self.n_context) logits = FullyConnectedWithTrackedMults('last_linear', l, out_dim=self.n_spks, nl=tf.identity) prob = tf.nn.softmax(logits, name='output') # used for validation accuracy of utterance identity_guesses = flatten(tf.argmax(prob, axis=1)) uniq_identities, _, count = tf.unique_with_counts(identity_guesses) idx_to_identity_with_most_votes = tf.argmax(count) chosen_identity = tf.gather(uniq_identities, idx_to_identity_with_most_votes) wrong = tf.expand_dims(tf.not_equal(chosen_identity, tf.cast(label[0], tf.int64)), axis=0, name='utt-wrong') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') add_moving_summary(cost) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) with tf.name_scope('original-weight-summaries'): add_param_summary(('.*/W', ['rms', 'histogram'])) add_param_summary(('.*/b', ['rms', 'histogram'])) with tf.name_scope('activation-summaries'): def fn(name): return ( name.endswith('output') or name.endswith('output:0') ) and "Inference" not in name and 'quantized' not in name tensors = get_tensors_from_graph(tf.get_default_graph(), fn) print("Adding activation tensors to summary:", tensors) for tensor in tensors: add_tensor_summary(tensor, ['rms', 'histogram']) if self.regularize: # decreasing regularization on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(wd_cost) self.cost = tf.add_n([cost, wd_cost], name='cost') else: self.cost = tf.identity(cost, name='cost') tf.constant([self.network_complexity['mults']], name='TotalMults') tf.constant([self.network_complexity['weights']], name='TotalWeights') logger.info("Parameter count: {}".format(self.network_complexity))
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = tf.expand_dims(image, 0) # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE) with tf.name_scope('anchors'): all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32) fm_anchors = tf.slice( all_anchors, [0, 0, 0, 0], tf.stack([ tf.shape(image)[1] // config.ANCHOR_STRIDE, tf.shape(image)[2] // config.ANCHOR_STRIDE, -1, -1 ]), name='fm_anchors') anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image = image_preprocess(image, bgr=True) image = tf.transpose(image, [0, 3, 1, 2]) # resnet50 featuremap = pretrained_resnet_conv4(image, [3, 4, 6]) rpn_label_logits, rpn_box_logits = rpn_head(featuremap) rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) decoded_boxes = decode_bbox_target( rpn_box_logits, fm_anchors) # (fHxfWxNA)x4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( decoded_boxes, tf.reshape(rpn_label_logits, [-1]), tf.shape(image)[2:]) if is_training: rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized) # nxc fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses( rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits, fastrcnn_box_logits) wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') self.cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, wd_cost ], 'total_cost') for k in self.cost, wd_cost: add_moving_summary(k) else: roi_resized = roi_align( featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_fastrcnn = resnet_conv5(roi_resized) # nxc label_logits, fastrcnn_box_logits = fastrcnn_head( feature_fastrcnn, config.NUM_CLASS) label_probs = tf.nn.softmax(label_logits, name='fastrcnn_all_probs') # NP, labels = tf.argmax(label_logits, axis=1) fg_ind, fg_box_logits = fastrcnn_predict_boxes( labels, fastrcnn_box_logits) fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs') fg_boxes = tf.gather(proposal_boxes, fg_ind) fg_box_logits = fg_box_logits / tf.constant( config.FASTRCNN_BBOX_REG_WEIGHTS) decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes) # Nfx4, floatbox decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')
def _build_graph(self, inputs): #### def down_conv_block(name, l, channel, nr_blks, stride=1): with tf.variable_scope(name): if stride != 1: assert stride == 2, 'U-Net supports stride 2 down-sample only' l = MaxPooling('max_pool', l, 2, strides=2) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l #### def up_conv_block(name, l, shorcut, channel, nr_blks, stride=2): with tf.variable_scope(name): if stride != 1: up_channel = l.get_shape().as_list()[1] # NCHW assert stride == 2, 'U-Net supports stride 2 up-sample only' l = Conv2DTranspose('deconv', l, up_channel, 2, strides=2) l = tf.concat([l, shorcut], axis=1) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l #### is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images if self.type_classification: true_type = truemap_coded[..., 1] true_type = tf.cast(true_type, tf.int32) true_type = tf.identity(true_type, name='truemap-type') one_type = tf.one_hot(true_type, self.nr_types, axis=-1) true_type = tf.expand_dims(true_type, axis=-1) true_dst = truemap_coded[..., -1] true_dst = tf.expand_dims(true_dst, axis=-1) true_dst = tf.identity(true_dst, name='truemap-dst') #### Xavier initializer with argscope(Conv2D, activation=tf.identity, use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(), bias_initializer=tf.constant_initializer(0.1)), \ argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format): i = tf.transpose(images / 255.0, [0, 3, 1, 2]) #### with tf.variable_scope('encoder'): e0 = down_conv_block('e0', i, 32, nr_blks=2, stride=1) e1 = down_conv_block('e1', e0, 64, nr_blks=2, stride=2) e2 = down_conv_block('e2', e1, 128, nr_blks=2, stride=2) e3 = down_conv_block('e3', e2, 256, nr_blks=2, stride=2) e4 = down_conv_block('e4', e3, 512, nr_blks=2, stride=2) c0 = crop_op(e0, (176, 176)) c1 = crop_op(e1, (80, 80)) c2 = crop_op(e2, (32, 32)) c3 = crop_op(e3, (8, 8)) with tf.variable_scope('decoder'): d3 = up_conv_block('d3', e4, c3, 256, nr_blks=2, stride=2) d2 = up_conv_block('d2', d3, c2, 128, nr_blks=2, stride=2) d1 = up_conv_block('d1', d2, c1, 64, nr_blks=2, stride=2) d0 = up_conv_block('d0', d1, c0, 32, nr_blks=2, stride=2) #### logi_dst = Conv2D('conv_out_dst', d0, 1, 1, activation=tf.identity) logi_dst = tf.transpose(logi_dst, [0, 2, 3, 1]) pred_dst = tf.identity(logi_dst, name='predmap-dst') if self.type_classification: logi_type = Conv2D('conv_out_type', d0, self.nr_types, 1, activation=tf.identity) logi_type = tf.transpose(logi_type, [0, 2, 3, 1]) soft_type = tf.nn.softmax(logi_type, axis=-1) # encoded so that inference can extract all output at once predmap_coded = tf.concat([soft_type, pred_dst], axis=-1) else: predmap_coded = pred_dst # * channel ordering: type-map, segmentation map # encoded so that inference can extract all output at once predmap_coded = tf.identity(predmap_coded, name='predmap-coded') #### if is_training: ######## LOSS loss = 0 ### regression loss loss_mse = pred_dst - true_dst loss_mse = loss_mse * loss_mse loss_mse = tf.reduce_mean(loss_mse, name='loss_mse') loss += loss_mse if self.type_classification: loss_type = categorical_crossentropy(soft_type, one_type) loss_type = tf.reduce_mean(loss_type, name='loss-xentropy-class') add_moving_summary(loss_type) loss += loss_type wd_loss = regularize_cost('.*/W', l2_regularizer(5.0e-6), name='l2_regularize_loss') loss += wd_loss self.cost = tf.identity(loss, name='cost') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W #### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) orig_imgs = crop_op(orig_imgs, (184, 184), "NHWC") pred_dst = colorize(pred_dst[..., 0], cmap='jet') true_dst = colorize(true_dst[..., 0], cmap='jet') viz = tf.concat([ orig_imgs, true_dst, pred_dst, ], 2) tf.summary.image('output', viz, max_outputs=1) return