def _build_graph(self, inputs): input, nextinput = inputs cell = rnn.MultiRNNCell([rnn.LSTMBlockCell(num_units=param.rnn_size) for _ in range(param.num_rnn_layer)]) def get_v(n): ret = tf.get_variable(n + '_unused', [param.batch_size, param.rnn_size], trainable=False, initializer=tf.constant_initializer()) ret = tf.placeholder_with_default(ret, shape=[None, param.rnn_size], name=n) return ret initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')), rnn.LSTMStateTuple(get_v('c1'), get_v('h1'))) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize) outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm') last_state = tf.identity(last_state, 'last_state') # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) tf.nn.softmax(logits / param.softmax_temprature, name='prob') xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.reshape(nextinput, [-1])) self.cost = tf.reduce_mean(xent_loss, name='cost') summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W summary.add_moving_summary(self.cost)
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): logits = (LinearWrap(image) .Conv2D('conv0') .MaxPooling('pool0', 2) .Conv2D('conv1') .Conv2D('conv2') .MaxPooling('pool1', 2) .Conv2D('conv3') .FullyConnected('fc0', 512, nl=tf.nn.relu) .Dropout('dropout', 0.5) .FullyConnected('fc1', out_dim=10, nl=tf.identity)()) tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def build_graph(self, image, label): """This function should build the model which takes the input variables and return cost at the end""" # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu): l = tf.layers.conv2d(image, 32, 3, name='conv0') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, name='conv1') l = tf.layers.conv2d(l, 32, 3, name='conv2') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, name='conv3') l = tf.layers.flatten(l) l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0') l = tf.layers.dropout(l, rate=0.5, training=get_current_tower_context().is_training) logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1') # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/kernel', tf.nn.l2_loss), name='regularize_loss') total_cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/kernel', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3') l = tf.layers.flatten(l) l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0') l = tf.layers.dropout(l, rate=0.5, training=get_current_tower_context().is_training) logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1') tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/kernel', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
def build_graph(self, comb_state, action, reward, isOver): comb_state = tf.cast(comb_state, tf.float32) comb_state = tf.reshape( comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel]) state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1]) state = tf.reshape(state, self._shape4d_for_prediction, name='state') self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state') next_state = tf.reshape(next_state, self._shape4d_for_prediction) action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.losses.huber_loss( target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary(('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def _build_graph(self, inputs): images, truemap_coded = inputs orig_imgs = images if hasattr(self, 'type_classification') and self.type_classification: true_type = truemap_coded[...,1] true_type = tf.cast(true_type, tf.int32) true_type = tf.identity(true_type, name='truemap-type') one_type = tf.one_hot(true_type, self.nr_types, axis=-1) true_type = tf.expand_dims(true_type, axis=-1) true_np = tf.cast(true_type > 0, tf.int32) # ? sanity this true_np = tf.identity(true_np, name='truemap-np') one_np = tf.one_hot(tf.squeeze(true_np), 2, axis=-1) else: true_np = truemap_coded[...,0] true_np = tf.cast(true_np, tf.int32) true_np = tf.identity(true_np, name='truemap-np') one_np = tf.one_hot(true_np, 2, axis=-1) true_np = tf.expand_dims(true_np, axis=-1) true_hv = truemap_coded[...,-2:] true_hv = tf.identity(true_hv, name='truemap-hv') #### with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, BatchNorm], data_format=self.data_format): i = tf.transpose(images, [0, 3, 1, 2]) i = i if not self.input_norm else i / 255.0 #### d = encoder(i, self.freeze) d[0] = crop_op(d[0], (184, 184)) d[1] = crop_op(d[1], (72, 72)) #### np_feat = decoder('np', d) npx = BNReLU('preact_out_np', np_feat[-1]) hv_feat = decoder('hv', d) hv = BNReLU('preact_out_hv', hv_feat[-1]) if self.type_classification: tp_feat = decoder('tp', d) tp = BNReLU('preact_out_tp', tp_feat[-1]) # Nuclei Type Pixels (TP) logi_class = Conv2D('conv_out_tp', tp, self.nr_types, 1, use_bias=True, activation=tf.identity) logi_class = tf.transpose(logi_class, [0, 2, 3, 1]) soft_class = tf.nn.softmax(logi_class, axis=-1) #### Nuclei Pixels (NP) logi_np = Conv2D('conv_out_np', npx, 2, 1, use_bias=True, activation=tf.identity) logi_np = tf.transpose(logi_np, [0, 2, 3, 1]) soft_np = tf.nn.softmax(logi_np, axis=-1) prob_np = tf.identity(soft_np[...,1], name='predmap-prob-np') prob_np = tf.expand_dims(prob_np, axis=-1) #### Horizontal-Vertival (HV) logi_hv = Conv2D('conv_out_hv', hv, 2, 1, use_bias=True, activation=tf.identity) logi_hv = tf.transpose(logi_hv, [0, 2, 3, 1]) prob_hv = tf.identity(logi_hv, name='predmap-prob-hv') pred_hv = tf.identity(logi_hv, name='predmap-hv') # * channel ordering: type-map, segmentation map # encoded so that inference can extract all output at once if self.type_classification: predmap_coded = tf.concat([soft_class, prob_np, pred_hv], axis=-1, name='predmap-coded') else: predmap_coded = tf.concat([prob_np, pred_hv], axis=-1, name='predmap-coded') #### def get_gradient_hv(l, h_ch, v_ch): """ Calculate the horizontal partial differentiation for horizontal channel and the vertical partial differentiation for vertical channel. The partial differentiation is approximated by calculating the central differnce which is obtained by using Sobel kernel of size 5x5. The boundary is zero-padded when channel is convolved with the Sobel kernel. Args: l (tensor): tensor of shape NHWC with C should be 2 (1 channel for horizonal and 1 channel for vertical) h_ch(int) : index within C axis of `l` that corresponds to horizontal channel v_ch(int) : index within C axis of `l` that corresponds to vertical channel """ def get_sobel_kernel(size): assert size % 2 == 1, 'Must be odd, get size=%d' % size h_range = np.arange(-size//2+1, size//2+1, dtype=np.float32) v_range = np.arange(-size//2+1, size//2+1, dtype=np.float32) h, v = np.meshgrid(h_range, v_range) kernel_h = h / (h * h + v * v + 1.0e-15) kernel_v = v / (h * h + v * v + 1.0e-15) return kernel_h, kernel_v mh, mv = get_sobel_kernel(5) mh = tf.constant(mh, dtype=tf.float32) mv = tf.constant(mv, dtype=tf.float32) mh = tf.reshape(mh, [5, 5, 1, 1]) mv = tf.reshape(mv, [5, 5, 1, 1]) # central difference to get gradient, ignore the boundary problem h = tf.expand_dims(l[...,h_ch], axis=-1) v = tf.expand_dims(l[...,v_ch], axis=-1) dh = tf.nn.conv2d(h, mh, strides=[1, 1, 1, 1], padding='SAME') dv = tf.nn.conv2d(v, mv, strides=[1, 1, 1, 1], padding='SAME') output = tf.concat([dh, dv], axis=-1) return output def loss_mse(true, pred, name=None): ### regression loss loss = pred - true loss = tf.reduce_mean(loss * loss, name=name) return loss def loss_msge(true, pred, focus, name=None): focus = tf.stack([focus, focus], axis=-1) pred_grad = get_gradient_hv(pred, 1, 0) true_grad = get_gradient_hv(true, 1, 0) loss = pred_grad - true_grad loss = focus * (loss * loss) # artificial reduce_mean with focus region loss = tf.reduce_sum(loss) / (tf.reduce_sum(focus) + 1.0e-8) loss = tf.identity(loss, name=name) return loss #### if get_current_tower_context().is_training: #---- LOSS ----# loss = 0 for term, weight in self.loss_term.items(): if term == 'mse': term_loss = loss_mse(true_hv, pred_hv, name='loss-mse') elif term == 'msge': focus = truemap_coded[...,0] term_loss = loss_msge(true_hv, pred_hv, focus, name='loss-msge') elif term == 'bce': term_loss = categorical_crossentropy(soft_np, one_np) term_loss = tf.reduce_mean(term_loss, name='loss-bce') elif 'dice' in self.loss_term: term_loss = dice_loss(soft_np[...,0], one_np[...,0]) \ + dice_loss(soft_np[...,1], one_np[...,1]) term_loss = tf.identity(term_loss, name='loss-dice') else: assert False, 'Not support loss term: %s' % term add_moving_summary(term_loss) loss += term_loss * weight if self.type_classification: term_loss = categorical_crossentropy(soft_class, one_type) term_loss = tf.reduce_mean(term_loss, name='loss-xentropy-class') add_moving_summary(term_loss) loss = loss + term_loss term_loss = 0 for type_id in range(self.nr_types): term_loss += dice_loss(soft_class[...,type_id], one_type[...,type_id]) term_loss = tf.identity(term_loss, name='loss-dice-class') add_moving_summary(term_loss) loss = loss + term_loss ### combine the loss into single cost function self.cost = tf.identity(loss, name='overall-loss') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W ### logging visual sthg orig_imgs = tf.cast(orig_imgs , tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC") pred_np = colorize(prob_np[...,0], cmap='jet') true_np = colorize(true_np[...,0], cmap='jet') pred_h = colorize(prob_hv[...,0], vmin=-1, vmax=1, cmap='jet') pred_v = colorize(prob_hv[...,1], vmin=-1, vmax=1, cmap='jet') true_h = colorize(true_hv[...,0], vmin=-1, vmax=1, cmap='jet') true_v = colorize(true_hv[...,1], vmin=-1, vmax=1, cmap='jet') if not self.type_classification: viz = tf.concat([orig_imgs, pred_h, pred_v, pred_np, true_h, true_v, true_np], 2) else: pred_type = tf.transpose(soft_class, (0, 1, 3, 2)) pred_type = tf.reshape(pred_type, [-1, 80, 80 * self.nr_types]) true_type = tf.cast(true_type[...,0] / self.nr_classes, tf.float32) true_type = colorize(true_type, vmin=0, vmax=1, cmap='jet') pred_type = colorize(pred_type, vmin=0, vmax=1, cmap='jet') viz = tf.concat([orig_imgs, pred_h, pred_v, pred_np, pred_type, true_h, true_v, true_np, true_type,], 2) viz = tf.concat([viz[0], viz[-1]], axis=0) viz = tf.expand_dims(viz, axis=0) tf.summary.image('output', viz, max_outputs=1) return
def build_graph(self, _, x, bboxes_xyz, bboxes_lwh, semantic_labels, heading_labels, heading_residuals, size_labels, size_residuals): l0_xyz = x l0_points = x # Set Abstraction layers l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=2048, radius=0.2, nsample=64, mlp=[64, 64, 128], mlp2=None, group_all=False, scope='sa1') l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=1024, radius=0.4, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=512, radius=0.8, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa3') l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=256, radius=1.2, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa4') # Feature Propagation layers l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], scope='fp1') seeds_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], scope='fp2') seeds_xyz = l2_xyz # Voting Module layers offset = tf.reshape(tf.concat([seeds_xyz, seeds_points], 2), [-1, 256 + 3]) units = [256, 256, 256 + 3] for i in range(len(units)): offset = FullyConnected( 'voting%d' % i, offset, units[i], activation=BNReLU if i < len(units) - 1 else None) offset = tf.reshape(offset, [-1, 1024, 256 + 3]) # B * N * 3 votes = tf.concat([seeds_xyz, seeds_points], 2) + offset votes_xyz = votes[:, :, :3] dist2center = tf.abs( tf.expand_dims(seeds_xyz, 2) - tf.expand_dims(bboxes_xyz, 1)) surface_ind = tf.less(dist2center, tf.expand_dims(bboxes_lwh, 1) / 2.) # B * N * BB * 3, bool surface_ind = tf.equal(tf.count_nonzero(surface_ind, -1), 3) # B * N * BB surface_ind = tf.greater_equal(tf.count_nonzero( surface_ind, -1), 1) # B * N, should be in at least one bbox dist2center_norm = tf.norm(dist2center, axis=-1) # B * N * BB votes_assignment = tf.argmin(dist2center_norm, -1, output_type=tf.int32) # B * N, int bboxes_xyz_votes_gt = tf.gather_nd( bboxes_xyz, tf.stack([ tf.tile( tf.expand_dims(tf.range(tf.shape(votes_assignment)[0]), -1), [1, tf.shape(votes_assignment)[1]]), votes_assignment ], 2)) # B * N * 3 vote_reg_loss = tf.reduce_mean( tf.norm(votes_xyz - bboxes_xyz_votes_gt, ord=1, axis=-1) * tf.cast(surface_ind, tf.float32), name='vote_reg_loss') votes_points = votes[:, :, 3:] # Proposal Module layers # Farthest point sampling on seeds proposals_xyz, proposals_output, _ = pointnet_sa_module( votes_xyz, votes_points, npoint=config.PROPOSAL_NUM, radius=0.3, nsample=64, mlp=[128, 128, 128], mlp2=[128, 128, 5 + 2 * config.NH + 4 * config.NS + config.NC], group_all=False, scope='proposal', sample_xyz=seeds_xyz) obj_cls_score = tf.identity(proposals_output[..., :2], 'obj_scores') nms_iou = tf.get_variable('nms_iou', shape=[], initializer=tf.constant_initializer(0.25), trainable=False) if not get_current_tower_context().is_training: def get_3d_bbox(box_size, heading_angle, center): batch_size = tf.shape(heading_angle)[0] c = tf.cos(heading_angle) s = tf.sin(heading_angle) zeros = tf.zeros_like(c) ones = tf.ones_like(c) rotation = tf.reshape( tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c], -1), tf.stack([batch_size, -1, 3, 3])) l, w, h = box_size[..., 0], box_size[..., 1], box_size[ ..., 2] # lwh(xzy) order!!! corners = tf.reshape( tf.stack([ l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2 ], -1), tf.stack([batch_size, -1, 3, 8])) return tf.einsum('ijkl,ijlm->ijmk', rotation, corners) + tf.expand_dims( center, 2) # B * N * 8 * 3 class_mean_size_tf = tf.constant(class_mean_size) size_cls_pred = tf.argmax( proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH + config.NS], axis=-1) size_cls_pred_onehot = tf.one_hot(size_cls_pred, depth=config.NS, axis=-1) # B * N * NS size_residual_pred = tf.reduce_sum( tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape( proposals_output[..., 5 + 2 * config.NH + config.NS:5 + 2 * config.NH + 4 * config.NS], (-1, config.PROPOSAL_NUM, config.NS, 3)), axis=2) size_pred = tf.gather_nd( class_mean_size_tf, tf.expand_dims(size_cls_pred, -1)) * tf.maximum( 1 + size_residual_pred, 1e-6) # B * N * 3: size # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]): center_pred = proposals_xyz + proposals_output[..., 2:5] # B * N * 3 heading_cls_pred = tf.argmax(proposals_output[..., 5:5 + config.NH], axis=-1) heading_cls_pred_onehot = tf.one_hot(heading_cls_pred, depth=config.NH, axis=-1) heading_residual_pred = tf.reduce_sum( heading_cls_pred_onehot * proposals_output[..., 5 + config.NH:5 + 2 * config.NH], axis=2) heading_pred = tf.floormod( (tf.cast(heading_cls_pred, tf.float32) * 2 + heading_residual_pred) * np.pi / config.NH, 2 * np.pi) # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]): bboxes = get_3d_bbox( size_pred, heading_pred, center_pred) # B * N * 8 * 3, lhw(xyz) order!!! # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1) # B * N * 6, lhw(xyz) order!!! # with tf.control_dependencies([tf.print(bboxes[0, 0])]): nms_idx = NMS3D(bboxes, tf.reduce_max(proposals_output[..., -config.NC:], axis=-1), proposals_output[..., :2], nms_iou) # Nnms * 2 bboxes_pred = tf.gather_nd(bboxes, nms_idx, name='bboxes_pred') # Nnms * 8 * 3 class_scores_pred = tf.gather_nd( proposals_output[..., -config.NC:], nms_idx, name='class_scores_pred') # Nnms * C batch_idx = tf.identity( nms_idx[:, 0], name='batch_idx' ) # Nnms, this is used to identify between batches return # calculate positive and negative proposal idxes bboxes_xyz_gt = bboxes_xyz # B * BB * 3 bboxes_labels_gt = semantic_labels # B * BB bboxes_heading_labels_gt = heading_labels bboxes_heading_residuals_gt = heading_residuals bboxes_size_labels_gt = size_labels bboxes_size_residuals_gt = size_residuals dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) - tf.expand_dims(bboxes_xyz_gt, 1), axis=-1) # B * PR * BB bboxes_assignment = tf.argmin(dist_mat, axis=-1) # B * PR min_dist = tf.reduce_min(dist_mat, axis=-1) positive_idxes = tf.where(min_dist < config.POSITIVE_THRES) # Np * 2 # with tf.control_dependencies([tf.print(tf.shape(positive_idxes))]): negative_idxes = tf.where(min_dist > config.NEGATIVE_THRES) # Nn * 2 positive_gt_idxes = tf.stack([ positive_idxes[:, 0], tf.gather_nd(bboxes_assignment, positive_idxes) ], axis=1) # objectiveness loss pos_obj_cls_score = tf.gather_nd(obj_cls_score, positive_idxes) pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32) neg_obj_cls_score = tf.gather_nd(obj_cls_score, negative_idxes) neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]], dtype=tf.int32) obj_cls_loss = tf.identity( tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pos_obj_cls_score, labels=pos_obj_cls_gt)) + tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=neg_obj_cls_score, labels=neg_obj_cls_gt)), name='obj_cls_loss') obj_correct = tf.concat([ tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1), tf.float32), tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1), tf.float32) ], axis=0, name='obj_correct') obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy') # center regression losses center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes) delta_predicted = tf.gather_nd(proposals_output[..., 2:5], positive_idxes) delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes) center_loss = tf.reduce_mean( tf.reduce_sum(tf.losses.huber_loss( labels=delta_gt, predictions=delta_predicted, reduction=tf.losses.Reduction.NONE), axis=-1)) # Appendix A1: chamfer loss, assignment at least one bbox to each gt bbox bboxes_assignment_dual = tf.argmin(dist_mat, axis=1) # B * BB batch_idx = tf.tile( tf.expand_dims(tf.range( tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]), axis=-1), [1, tf.shape(bboxes_assignment_dual)[1]]) # B * BB delta_gt_dual = bboxes_xyz_gt - tf.gather_nd( proposals_xyz, tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3 delta_predicted_dual = tf.gather_nd( proposals_output[..., 2:5], tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3 center_loss_dual = tf.reduce_mean( tf.reduce_sum(tf.losses.huber_loss( labels=delta_gt_dual, predictions=delta_predicted_dual, reduction=tf.losses.Reduction.NONE), axis=-1)) # add up center_loss += center_loss_dual # Heading loss heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt, positive_gt_idxes) heading_cls_score = tf.gather_nd( proposals_output[..., 5:5 + config.NH], positive_idxes) heading_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=heading_cls_score, labels=heading_cls_gt)) heading_cls_gt_onehot = tf.one_hot(heading_cls_gt, depth=config.NH, on_value=1, off_value=0, axis=-1) # Np * NH heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt, positive_gt_idxes) # Np heading_residual_predicted = tf.gather_nd( proposals_output[..., 5 + config.NH:5 + 2 * config.NH], positive_idxes) # Np * NH heading_residual_loss = tf.losses.huber_loss( labels=heading_residual_gt, predictions=tf.reduce_sum(heading_residual_predicted * tf.to_float(heading_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.MEAN) # Size loss size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes) size_cls_score = tf.gather_nd( proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH + config.NS], positive_idxes) size_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=size_cls_score, labels=size_cls_gt)) size_cls_gt_onehot = tf.one_hot(size_cls_gt, depth=config.NS, on_value=1, off_value=0, axis=-1) # Np * NS size_cls_gt_onehot = tf.tile( tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1), [1, 1, 3]) # Np * NS * 3 size_residual_gt = tf.gather_nd(bboxes_size_residuals_gt, positive_gt_idxes) # Np * 3 size_residual_predicted = tf.reshape( tf.gather_nd( proposals_output[..., 5 + 2 * config.NH + config.NS:5 + 2 * config.NH + 4 * config.NS], positive_idxes), (-1, config.NS, 3)) # Np * NS * 3 size_residual_loss = tf.reduce_mean( tf.reduce_sum(tf.losses.huber_loss( labels=size_residual_gt, predictions=tf.reduce_sum(size_residual_predicted * tf.to_float(size_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.NONE), axis=-1)) box_loss = center_loss + 0.1 * heading_cls_loss + heading_residual_loss + 0.1 * size_cls_loss + size_residual_loss # semantic loss sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:], positive_idxes) sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes) # Np sem_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=sem_cls_score, labels=sem_cls_gt), name='sem_cls_loss') sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1), tf.float32, name='sem_correct') sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch summary.add_moving_summary(obj_accuracy, sem_accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. # no weight decay # wd_cost = tf.multiply(1e-5, # regularize_cost('.*/W', tf.nn.l2_loss), # name='regularize_loss') total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss total_cost = tf.identity(total_cost, name='total_cost') summary.add_moving_summary(total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def build_graph(self, image, label): is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) return tf.clip_by_value(x, 0.0, 1.0) def activate(x): return fa(nonlin(x)) image = image / 256.0 with remap_variables(binarize_weight), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): logits = ( LinearWrap(image).Conv2D('conv0', 48, 5, padding='VALID', use_bias=True).MaxPooling( 'pool0', 2, padding='SAME').apply(activate) # 18 .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm( 'bn1').apply(activate).Conv2D( 'conv2', 64, 3, padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling( 'pool1', 2, padding='SAME').apply(activate) # 9 .Conv2D( 'conv3', 128, 3, padding='VALID').apply(fg).BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', 128, 3, padding='SAME').apply(fg). BatchNorm('bn4').apply(activate).Conv2D( 'conv5', 128, 3, padding='VALID').apply(fg).BatchNorm('bn5').apply(activate) # 5 .Dropout(rate=0.5 if is_training else 0.0).Conv2D( 'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm( 'bn6').apply(nonlin).FullyConnected('fc1', 10)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_tensor') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) total_cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, total_cost) return total_cost
def build_graph(self, image, label): image = image / 128.0 assert tf.test.is_gpu_available() image = tf.transpose(image, [0, 3, 1, 2]) def residual(name, l, increase_dim=False, first=False): shape = l.get_shape().as_list() in_channel = shape[1] if increase_dim: out_channel = in_channel * 2 stride1 = 2 else: out_channel = in_channel stride1 = 1 with tf.variable_scope(name): b1 = l if first else BNReLU(l) c1 = Conv2D('conv1', b1, out_channel, strides=stride1, activation=BNReLU) c2 = Conv2D('conv2', c1, out_channel) if increase_dim: l = AvgPooling('pool', l, 2) l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]]) l = c2 + l return l with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, kernel_size=3, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 16, activation=BNReLU) l = residual('res1.0', l, first=True) for k in range(1, self.n): l = residual('res1.{}'.format(k), l) # 32,c=16 l = residual('res2.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res2.{}'.format(k), l) # 16,c=32 l = residual('res3.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res3.' + str(k), l) l = BNReLU('bnlast', l) # 8,c=64 l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 10) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W return tf.add_n([cost, wd_cost], name='cost')
def get_logits(self, image, label=None): if self.bit_w == 't': fw, fa, fg = get_dorefa(32, 32, 32) fw = ternarize else: fw, fa, fg = get_dorefa(self.bit_w, self.bit_a, self.bit_g) # monkey-patch tf.get_variable to apply fw def new_get_variable(v): if self.float_train: return v else: name = v.op.name # don't binarize first and last layer if model_name == 'alexnet': if not name.endswith( 'W') or 'conv0' in name or 'fct' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) return fw(v) elif model_name == 'resnet18' and dataset_name == 'cifar': if not name.endswith( 'kernel') or 'conv1_1' in name or 'dense' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) return fw(v) elif model_name == 'resnet18' and dataset_name == 'ImageNet': if not name.endswith( 'W') or 'conv0' in name or 'linear' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) # still use relu for 32-bit cases return tf.clip_by_value(x, 0.0, 1.0) def activate(x): if self.float_train: return x else: return fa(nonlin(x)) def identity_block2d(input_tensor, kernel_size, filters, stage, block, is_training, reuse, kernel_initializer=tf.contrib.layers. variance_scaling_initializer()): filters1, filters2, filters3 = filters conv_name_2 = 'conv' + str(stage) + '_' + str(block) + '_3x3' bn_name_2 = 'bn' + str(stage) + '_' + str(block) + '_3x3' x = tf.layers.conv2d(input_tensor, filters2, kernel_size, use_bias=False, padding='SAME', kernel_initializer=kernel_initializer, name=conv_name_2, reuse=reuse) x = tf.layers.batch_normalization(x, training=is_training, name=bn_name_2, reuse=reuse) x = activate(x) conv_name_3 = 'conv' + str(stage) + '_' + str( block) + '_1x1_increase' bn_name_3 = 'bn' + str(stage) + '_' + str(block) + '_1x1_increase' x = tf.layers.conv2d(x, filters3, (kernel_size, kernel_size), use_bias=False, padding='SAME', kernel_initializer=kernel_initializer, name=conv_name_3, reuse=reuse) x = tf.layers.batch_normalization(x, training=is_training, name=bn_name_3, reuse=reuse) x = tf.add(input_tensor, x) if block != '4b': x = activate(x) return x def conv_block_2d(input_tensor, kernel_size, filters, stage, block, is_training, reuse, strides=(2, 2), kernel_initializer=tf.contrib.layers. variance_scaling_initializer()): filters1, filters2, filters3 = filters conv_name_2 = 'conv' + str(stage) + '_' + str(block) + '_3x3' bn_name_2 = 'bn' + str(stage) + '_' + str(block) + '_3x3' x = tf.layers.conv2d(input_tensor, filters2, (kernel_size, kernel_size), use_bias=False, strides=strides, padding='SAME', kernel_initializer=kernel_initializer, name=conv_name_2, reuse=reuse) x = tf.layers.batch_normalization(x, training=is_training, name=bn_name_2, reuse=reuse) x = tf.nn.relu(x) conv_name_3 = 'conv' + str(stage) + '_' + str( block) + '_1x1_increase' bn_name_3 = 'bn' + str(stage) + '_' + str(block) + '_1x1_increase' x = tf.layers.conv2d(x, filters3, (kernel_size, kernel_size), use_bias=False, padding='SAME', kernel_initializer=kernel_initializer, name=conv_name_3, reuse=reuse) x = tf.layers.batch_normalization(x, training=is_training, name=bn_name_3, reuse=reuse) conv_name_4 = 'conv' + str(stage) + '_' + str( block) + '_1x1_shortcut' bn_name_4 = 'bn' + str(stage) + '_' + str(block) + '_1x1_shortcut' shortcut = tf.layers.conv2d(input_tensor, filters3, (kernel_size, kernel_size), use_bias=False, strides=strides, padding='SAME', kernel_initializer=kernel_initializer, name=conv_name_4, reuse=reuse) shortcut = tf.layers.batch_normalization(shortcut, training=is_training, name=bn_name_4, reuse=reuse) x = tf.add(shortcut, x) x = tf.nn.relu(x) return x def resnet18_cifar(input_tensor, is_training=True, pooling_and_fc=True, reuse=False, kernel_initializer=tf.contrib.layers. variance_scaling_initializer()): with remap_variables(new_get_variable): x = tf.layers.conv2d(input_tensor, 64, (3, 3), strides=(1, 1), kernel_initializer=kernel_initializer, use_bias=False, padding='SAME', name='conv1_1/3x3_s1', reuse=reuse) x = tf.layers.batch_normalization(x, training=is_training, name='bn1_1/3x3_s1', reuse=reuse) x = tf.nn.relu(x) x1 = identity_block2d(x, 3, [48, 64, 64], stage=2, block='1b', is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x1 = identity_block2d(x1, 3, [48, 64, 64], stage=3, block='1c', is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x2 = conv_block_2d(x1, 3, [96, 128, 128], stage=3, block='2a', strides=(2, 2), is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x2 = activate(x2) x2 = identity_block2d(x2, 3, [96, 128, 128], stage=3, block='2b', is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x3 = conv_block_2d(x2, 3, [128, 256, 256], stage=4, block='3a', strides=(2, 2), is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x3 = activate(x3) x3 = identity_block2d(x3, 3, [128, 256, 256], stage=4, block='3b', is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x4 = conv_block_2d(x3, 3, [256, 512, 512], stage=5, block='4a', strides=(2, 2), is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) x4 = activate(x4) x4 = identity_block2d(x4, 3, [256, 512, 512], stage=5, block='4b', is_training=is_training, reuse=reuse, kernel_initializer=kernel_initializer) print('before gap: ', x4) x4 = tf.reduce_mean(x4, [1, 2]) print('after gap: ', x4) # flatten = tf.contrib.layers.flatten(x4) prob = tf.layers.dense( x4, self.class_num, reuse=reuse, kernel_initializer=tf.contrib.layers.xavier_initializer()) # tmp = tf.trainable_variables() # prob = tf.layers.batch_normalization(prob, training=is_training, name='fbn', reuse=reuse) print('prob', prob) return prob def resnet_group(name, l, block_func, features, count, stride): with tf.variable_scope(name): for i in range(0, count): with tf.variable_scope('block{}'.format(i)): l = block_func(l, features, stride if i == 0 else 1) return l def resnet_shortcut(l, n_out, stride, activation=tf.identity): # data_format = get_arg_scope()['Conv2D']['data_format'] n_in = l.get_shape().as_list()[1 if self.data_format in ['NCHW', 'channels_first'] else 3] if n_in != n_out: # change dimension when channel is not the same return activate( Conv2D('convshortcut', l, n_out, 1, strides=stride, activation=activation)) else: return l def get_bn(zero_init=False): """ Zero init gamma is good for resnet. See https://arxiv.org/abs/1706.02677. """ if zero_init: return lambda x, name=None: BatchNorm( 'bn', x, gamma_initializer=tf.zeros_initializer()) else: return lambda x, name=None: BatchNorm('bn', x) def resnet_basicblock(l, ch_out, stride): shortcut = l l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU) l = activate(l) l = Conv2D('conv2', l, ch_out, 3, activation=get_bn(zero_init=True)) l = activate(l) out = l + resnet_shortcut( shortcut, ch_out, stride, activation=get_bn(zero_init=False)) return tf.nn.relu(out) def resnet18_imagenet(image): with remap_variables(new_get_variable), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # Note that this pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = resnet_group('group0', l, resnet_basicblock, 64, 2, 1) l = activate(l) l = resnet_group('group1', l, resnet_basicblock, 128, 2, 2) l = activate(l) l = resnet_group('group2', l, resnet_basicblock, 256, 2, 2) l = activate(l) l = resnet_group('group3', l, resnet_basicblock, 512, 2, 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) # tmp = tf.trainable_variables() return logits def alexnet(image): with remap_variables(new_get_variable), \ argscope([Conv2D, BatchNorm, MaxPooling], data_format='channels_first'), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): logits = ( LinearWrap(image).Conv2D( 'conv0', 96, 12, strides=4, padding='VALID', use_bias=True).apply(fg).Conv2D( 'conv1', 256, 5, padding='SAME', split=2).apply(fg).BatchNorm('bn1').MaxPooling( 'pool1', 3, 2, padding='SAME').apply(activate).Conv2D( 'conv2', 384, 3).apply(fg).BatchNorm('bn2').MaxPooling( 'pool2', 3, 2, padding='SAME').apply(activate).Conv2D( 'conv3', 384, 3, split=2). apply(fg).BatchNorm('bn3').apply(activate).Conv2D( 'conv4', 256, 3, split=2).apply(fg).BatchNorm('bn4').MaxPooling( 'pool4', 3, 2, padding='VALID').apply(activate).FullyConnected( 'fc0', 4096).apply(fg).BatchNorm('bnfc0').apply( activate).FullyConnected('fc1', 4096, use_bias=False). apply(fg).BatchNorm('bnfc1').apply(nonlin).FullyConnected( 'fct', self.class_num, use_bias=True)()) return logits logits = None if self.model_name == 'alexnet': logits = alexnet(image) elif self.model_name == 'resnet18': if dataset_name == 'cifar': logits = resnet18_cifar(image, reuse=tf.AUTO_REUSE) elif dataset_name == 'ImageNet': logits = resnet18_imagenet(image) add_param_summary(('.*/W', ['histogram', 'rms'])) tf.nn.softmax(logits, name='output') # for prediction return logits
def _build_graph(self, inputs): inp, label = inputs is_training = get_current_tower_context().is_training tw = get_tw(self.tw_thres) def ternarize_weight(v): name = v.op.name if not (name.endswith('W')): logger.info("Not ternarizing {}".format(name)) return v elif not self.quant_ends and 'conv0' in name: logger.info("Not ternarizing {}".format(name)) return v elif not self.quant_ends and 'last_linear' in name: logger.info("Not ternarizing {}".format(name)) return v elif not self.quant_ends and (self.net_fn == fcn1_net or self.net_fn == fcn2_net) and 'linear0' in name: logger.info("Not ternarizing {}".format(name)) return v else: logger.info("Ternarizing weight {}".format(name)) return tw(v) def nonlin(x, name="activate"): if self.bita == 32: return BNReLUWithTrackedMults(x) else: assert False with remap_variables(ternarize_weight), \ argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \ argscope([Conv2DWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4): l = self.net_fn(inp, nonlin, self.n_context) logits = FullyConnectedWithTrackedMults('last_linear', l, out_dim=self.n_spks, nl=tf.identity) prob = tf.nn.softmax(logits, name='output') # used for validation accuracy of utterance identity_guesses = flatten(tf.argmax(prob, axis=1)) uniq_identities, _, count = tf.unique_with_counts(identity_guesses) idx_to_identity_with_most_votes = tf.argmax(count) chosen_identity = tf.gather(uniq_identities, idx_to_identity_with_most_votes) wrong = tf.expand_dims(tf.not_equal(chosen_identity, tf.cast(label[0], tf.int64)), axis=0, name='utt-wrong') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') add_moving_summary(cost) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) with tf.name_scope('original-weight-summaries'): add_param_summary(('.*/W', ['rms', 'histogram'])) add_param_summary(('.*/b', ['rms', 'histogram'])) with tf.name_scope('activation-summaries'): def fn(name): return (name.endswith('output') or name.endswith('output:0')) and "Inference" not in name and 'quantized' not in name tensors = get_tensors_from_graph(tf.get_default_graph(), fn) logger.info("Adding activation tensors to summary: {}".format(tensors)) for tensor in tensors: add_tensor_summary(tensor, ['rms', 'histogram']) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(wd_cost) self.cost = tf.add_n([cost, wd_cost], name='cost') tf.constant([self.network_complexity['mults']], name='TotalMults') tf.constant([self.network_complexity['weights']], name='TotalWeights') logger.info("Parameter count: {}".format(self.network_complexity))
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images pen_map = truemap_coded[..., -1] if hasattr(self, 'type_classification') and self.type_classification: true = truemap_coded[..., 1] else: true = truemap_coded[..., 0] true = tf.cast(true, tf.int32) true = tf.identity(true, name='truemap') one = tf.one_hot( true, self.nr_types if self.type_classification else self.nr_classes, axis=-1) true = tf.expand_dims(true, axis=-1) def encoder_blk(name, feat_in, num_feats, has_down=False): with tf.variable_scope(name): feat = feat_in if not has_down else MaxPooling( 'pool1', feat_in, 2, strides=2, padding='same') feat = Conv2D('conv_1', feat, num_feats, 3, padding='valid', strides=1, activation=tf.nn.relu) feat = Conv2D('conv_2', feat, num_feats, 3, padding='valid', strides=1, activation=tf.nn.relu) return feat def decoder_blk(name, feat_in, num_feats, shorcut): with tf.variable_scope(name): in_ch = feat_in.get_shape().as_list()[1] feat = Conv2DTranspose('us', feat_in, in_ch, 2, strides=(2, 2), padding='same', activation=tf.identity) feat = tf.concat([feat, shorcut], axis=1) feat = Conv2D('conv_1', feat, num_feats, 3, padding='valid', strides=1, activation=tf.nn.relu) feat = Conv2D('conv_2', feat, num_feats, 3, padding='valid', strides=1, activation=tf.nn.relu) return feat #### Xavier initializer with argscope([Conv2D, Conv2DTranspose], activation=tf.identity, use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()), \ argscope([Conv2D, MaxPooling, Conv2DTranspose], data_format=self.data_format): i = tf.transpose(images, [0, 3, 1, 2]) i = i if not self.input_norm else i / 255.0 d1 = encoder_blk('d1', i, 64, has_down=False) d2 = encoder_blk('d2', d1, 128, has_down=True) d3 = encoder_blk('d3', d2, 256, has_down=True) d4 = encoder_blk('d4', d3, 512, has_down=True) d4 = tf.layers.dropout(d4, rate=0.5, seed=5, training=is_training) d5 = encoder_blk('d5', d4, 1024, has_down=True) d5 = tf.layers.dropout(d5, rate=0.5, seed=5, training=is_training) d1 = crop_op(d1, (176, 176)) d2 = crop_op(d2, (80, 80)) d3 = crop_op(d3, (32, 32)) d4 = crop_op(d4, (8, 8)) feat = decoder_blk('u4', d5, 512, d4) feat = decoder_blk('u3', feat, 256, d3) feat = decoder_blk('u2', feat, 128, d2) feat = decoder_blk('u1', feat, 64, d1) logi = Conv2D( 'conv_out', feat, self.nr_types if self.type_classification else self.nr_classes, 1, use_bias=True, activation=tf.identity) logi = tf.transpose(logi, [0, 2, 3, 1]) soft = tf.nn.softmax(logi, axis=-1) if self.type_classification: prob_np = tf.reduce_sum(soft[..., 1:], axis=-1, keepdims=True) prob_np = tf.identity(prob_np, name='predmap-prob-np') predmap_coded = tf.concat([soft, prob_np], axis=-1) else: prob_np = tf.identity(soft[..., 1], name='predmap-prob') prob_np = tf.expand_dims(prob_np, axis=-1) predmap_coded = prob_np # * channel ordering: type-map, segmentation map # encoded so that inference can extract all output at once predmap_coded = tf.identity(predmap_coded, name='predmap-coded') #### if is_training: ######## LOSS ### classification loss loss_bce = categorical_crossentropy(soft, one) loss_bce = tf.reduce_mean(loss_bce * pen_map, name='loss-bce') add_moving_summary(loss_bce) wd_loss = regularize_cost('.*/W', l2_regularizer(1.0e-5), name='l2_wd_loss') add_moving_summary(wd_loss) self.cost = loss_bce + wd_loss add_param_summary(('.*/W', ['histogram'])) # monitor W #### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) orig_imgs = crop_op(orig_imgs, (184, 184), "channels_last") tf.summary.image('input', orig_imgs, max_outputs=1) pred = colorize(prob_np[..., 0], cmap='jet') true = colorize(true[..., 0], cmap='jet') pen_map = colorize(pen_map, cmap='jet') viz = tf.concat([orig_imgs, pred, true, pen_map], 2) tf.summary.image('output', viz, max_outputs=1) return
def build_graph(self, image, label): """This function should build the model which takes the input variables and return cost at the end""" # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.layers.flatten(image) # image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with tf.variable_scope('encoder'): x = FullyConnected('fc1', image, 1000, activation=tf.nn.relu) x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu) mu = tf.identity(FullyConnected('fc_mu', x, 2, activation=None), 'mu') logvar = FullyConnected('fc_var', x, 2, activation=None) eps = tf.random_normal((tf.shape(x)[0], 2)) z = tf.identity(eps * tf.exp(0.5 * logvar) + mu, name='z') with tf.variable_scope('decoder'): x = FullyConnected('fc1', z, 1000, activation=tf.nn.relu) x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu) rec = tf.identity( FullyConnected('fc_rec', x, IMAGE_SIZE * IMAGE_SIZE, activation=tf.nn.sigmoid), 'rec') kl_loss = -tf.reduce_sum(1 + logvar - mu * mu - tf.exp(logvar), -1) kl_loss = tf.reduce_mean(kl_loss, name='kl_loss') rec_loss = tf.reduce_mean(tf.reduce_sum(tf.square(rec - image), -1), name='rec_loss') total_cost = rec_loss + kl_loss # a vector of length B with loss of each sample # cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) # cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss # # correct = tf.cast(tf.nn.in_top_k(predictions=logits, targets=label, k=1), tf.float32, name='correct') # accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch # train_error = tf.reduce_mean(1 - correct, name='train_error') # summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. # wd_cost = tf.multiply(1e-5, # regularize_cost('fc.*/W', tf.nn.l2_loss), # name='regularize_loss') # total_cost = tf.add_n([wd_cost, cost], name='total_cost') # summary.add_moving_summary(cost, wd_cost, total_cost) summary.add_moving_summary(rec_loss, kl_loss) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def build_graph(self, *inputs): comb_state, self.action, reward, isOver, human = inputs comb_state = tf.cast(comb_state, tf.float32) state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, -1, self.channel], name='state') # Standard DQN loss self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0, 0, 0, 0, 1], [-1, -1, -1, -1, self.channel], name='next_state') self.action_onehot = tf.one_hot(self.action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * self.action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if 'Double' not in self.method: # DQN or Dueling best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN or DuelingDouble next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast( isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.losses.huber_loss(target, pred_action_value, reduction=tf.losses.Reduction.MEAN) ############################################################################### # HITL UPDATE: Margin classification loss # This can only be calculated on the Human generated samples. # Q(s,A_E) (The Q value of the action that was take by the human in that state) action_value_1 = tf.multiply(self.predict_value, self.action_onehot, name='action_value_1') tar = tf.reduce_sum(action_value_1, 1) # l(a_E,a) here penalise every action plus 0.8 except the action that the # human took which gets 0 mar = tf.one_hot(self.action, self.num_actions, 0.0, 0.8) # max[Q(s,a) + l(a_E,a)] # Q(s,a) = self.predict_value mar_1 = tf.add(mar, self.predict_value) margin = tf.reduce_max(mar_1, 1) margin_loss = tf.subtract(margin, tar) # this has been applied to all the transitions now need to set the # margin classification loss to 0 for the transition which were # not generated by a Human margin_loss = tf.where(human, margin_loss, tf.zeros_like(margin_loss, dtype=tf.float32)) margin_loss = tf.reduce_mean(margin_loss) cost = tf.add(cost, margin_loss) ############################################################################### summary.add_param_summary( ('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) logger.info("Cost: {}".format(cost)) return cost
def build_graph(self, imgs, cams, gt_depth): # preprocess imgs, gt_depth, ref_img = self._preprocess(imgs, gt_depth) with argscope([tf.layers.conv3d, tf.layers.conv3d_transpose, mvsnet_gn, Conv2D, Conv2DTranspose, MaxPooling, AvgPooling, BatchNorm], data_format=self.data_format),\ argscope(tf.layers.batch_normalization, axis=-1): # feature extraction # shape: b, view_num, h/4, w/4, c feature_maps = feature_extraction_net(imgs, self.branch_function) # get depth_start and depth_interval batch-wise depth_start, depth_interval, depth_end = get_depth_meta( cams, depth_num=self.depth_num) # warping layer # shape of cost_volume: b, depth_num, h/4, w/4, c cost_volume = warping_layer('warping', feature_maps, cams, depth_start, depth_interval, self.depth_num) # cost_volume = tf.get_variable('fake_cost_volume', (1, 32, 192, 128, 160)) if self.regularize_type == '3DCNN': # cost volume regularization # regularized_cost_volume: b, d, h/4, w/4 regularized_cost_volume = cost_volume_regularization( cost_volume, self.bn_training, self.bn_trainable) # regularized_cost_volume = simple_cost_volume_regularization(cost_volume, self.bn_training, self.bn_trainable) # shape of coarse_depth: b, 1, h/4, w/4 # shape of prob_map: b, h/4, w/4, 1 # TODO: no need to pass batch_size as param, actually, it is needed, because it is needed in the graph buiding coarse_depth, prob_map = soft_argmin( 'soft_argmin', regularized_cost_volume, depth_start, depth_end, self.depth_num, depth_interval, self.batch_size) # shape of refine_depth: b, 1, h/4, w/4 if self.is_refine: refine_depth = depth_refinement(coarse_depth, ref_img, depth_start, depth_end) loss_coarse, *_ = mvsnet_regression_loss( gt_depth, coarse_depth, depth_interval, 'coarse_loss') loss_refine, less_one_accuracy, less_three_accuracy = mvsnet_regression_loss( gt_depth, refine_depth, depth_interval, 'refine_loss') else: refine_depth = coarse_depth # loss_coarse, *_ = mvsnet_regression_loss(gt_depth, coarse_depth, depth_interval, 'coarse_loss') loss_refine, less_one_accuracy, less_three_accuracy = mvsnet_regression_loss( gt_depth, refine_depth, depth_interval, 'refine_loss') loss_coarse = tf.identity(loss_refine, name='loss_coarse') # FIXME: it is weried because I never use refine part coarse_depth = tf.identity(coarse_depth, 'coarse_depth') refine_depth = tf.identity(refine_depth, 'refine_depth') prob_map = tf.identity(prob_map, 'prob_map') loss = tf.add(loss_refine / 2, loss_coarse * self.lambda_ / 2, name='loss') less_one_accuracy = tf.identity(less_one_accuracy, name='less_one_accuracy') less_three_accuracy = tf.identity(less_three_accuracy, name='less_three_accuracy') else: prob_volume = gru_regularization(cost_volume, self.bn_training, self.bn_trainable) loss, mae, less_one_accuracy, less_three_accuracy, coarse_depth = \ mvsnet_classification_loss( prob_volume, gt_depth, self.depth_num, depth_start, depth_interval) coarse_depth = tf.identity(coarse_depth, 'coarse_depth') refine_depth = tf.identity(coarse_depth, 'refine_depth') # prob_map = get_propability_map(prob_volume, coarse_depth, depth_start, depth_interval) with tf.variable_scope('summaries'): with tf.device('/cpu:0'): if self.regularize_type == '3DCNN': add_moving_summary(loss, loss_coarse, loss_refine, less_one_accuracy, less_three_accuracy) else: add_moving_summary(loss, less_one_accuracy, less_three_accuracy) if self.regularize_type == '3DCNN': add_image_summary(prob_map, name='prob_map') add_image_summary(coarse_depth, name='coarse_depth') add_image_summary(refine_depth, name='refine_depth') add_image_summary(ref_img, name='rgb') add_image_summary(gt_depth, name='gt_depth') if self.debug_param_summary: with tf.device('/gpu:0'): add_param_summary(['.*/W', ['histogram', 'rms']], ['.*/gamma', ['histogram', 'mean']], ['.*/beta', ['histogram', 'mean']]) # all_vars = [var for var in tf.trainable_variables() if "gamma" in var.name or 'beta' in var.name] # grad_vars = tf.gradients(loss, all_vars) # for var, grad in zip(all_vars, grad_vars): # add_tensor_summary(grad, ['histogram', 'rms'], name=var.name + '-grad') # all_vars = [var for var in tf.trainable_variables()] # grad_vars = tf.gradients(loss, all_vars) # for var, grad in zip(all_vars, grad_vars): # add_tensor_summary(grad, ['histogram'], name=var.name + '-grad') return loss
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images true = truemap_coded[..., 0] true = tf.cast(true, tf.int32) true = tf.identity(true, name="truemap") one_hot = tf.one_hot(true, 2, axis=-1) true = tf.expand_dims(true, axis=-1) #### with argscope( Conv2D, activation=tf.identity, use_bias=False, # K.he initializer W_init=tf.variance_scaling_initializer(scale=2.0, mode="fan_out"), ), argscope([Conv2D], data_format=self.data_format): i = images if not self.input_norm else images / 255.0 #### feat = net( "net", i, self.basis_filter_list, self.rot_matrix_list, self.nr_orients, self.filter_type, is_training, ) #### Prediction o_logi = Conv2D("output", feat, 2, 1, use_bias=True, nl=tf.identity) soft = tf.nn.softmax(o_logi, axis=-1) prob = tf.identity(soft, name="predmap-prob") # encoded so that inference can extract all output at once predmap_coded = tf.concat(prob, axis=-1, name="predmap-coded") #### if get_current_tower_context().is_training: # ---- LOSS ----# loss = 0 for term, weight in self.loss_term.items(): if term == "bce": term_loss = categorical_crossentropy(soft, one_hot) term_loss = tf.reduce_mean(term_loss, name="loss-bce") else: assert False, "Not support loss term: %s" % term add_moving_summary(term_loss) loss += term_loss * weight ### combine the loss into single cost function wd_loss = regularize_cost(".*/W", l2_regularizer(1.0e-7), name="l2_wd_loss") add_moving_summary(wd_loss) self.cost = tf.identity(loss + wd_loss, name="overall-loss") add_moving_summary(self.cost) #### add_param_summary((".*/W", ["histogram"])) # monitor W ### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image("input", orig_imgs, max_outputs=1) return
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images true = truemap_coded[..., :3] true = tf.cast(true, tf.int32) true = tf.identity(true, name='truemap') one_hot = tf.cast(true, tf.float32) #### with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D], data_format=self.data_format): i = images if not self.input_norm else images / 255.0 #### d = encoder('encoder', i, self.basis_filter_list, self.rot_matrix_list, self.nr_orients, self.filter_type, is_training) #### feat = decoder('decoder', d, self.basis_filter_list, self.rot_matrix_list, self.nr_orients, self.filter_type, is_training) feat1 = Conv2D('feat', feat, 96, 1, use_bias=True, nl=BNReLU) o_logi = Conv2D('output', feat, 3, 1, use_bias=True, nl=tf.identity) soft = tf.nn.softmax(o_logi, axis=-1) prob = tf.identity(soft[..., :2], name='predmap-prob') # encoded so that inference can extract all output at once predmap_coded = tf.concat(prob, axis=-1, name='predmap-coded') #### if get_current_tower_context().is_training: #---- LOSS ----# loss = 0 for term, weight in self.loss_term.items(): if term == 'bce': term_loss = categorical_crossentropy(soft, one_hot) term_loss = tf.reduce_mean(term_loss, name='loss-bce') elif 'dice' in self.loss_term: # branch 1 term_loss = dice_loss(soft[...,0], one_hot[...,0]) \ + dice_loss(soft[...,1], one_hot[...,1]) term_loss = tf.identity(term_loss, name='loss-dice') else: assert False, 'Not support loss term: %s' % term add_moving_summary(term_loss) loss += term_loss ### combine the loss into single cost function wd_loss = regularize_cost('.*/W', l2_regularizer(1.0e-7), name='l2_wd_loss') add_moving_summary(wd_loss) self.cost = tf.identity(loss + wd_loss, name='overall-loss') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W ### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) pred_blb = colorize(prob[..., 0], cmap='jet') true_blb = colorize(true[..., 0], cmap='jet') pred_cnt = colorize(prob[..., 1], cmap='jet') true_cnt = colorize(true[..., 1], cmap='jet') viz = tf.concat( [orig_imgs, pred_blb, pred_cnt, true_blb, true_cnt], 2) viz = tf.concat([viz[0], viz[-1]], axis=0) viz = tf.expand_dims(viz, axis=0) tf.summary.image('output', viz, max_outputs=1) return
def build_graph( self, cam1: tf.Tensor, cam2: tf.Tensor, mask: tf.Tensor, normal: tf.Tensor, depth: tf.Tensor, sgs: tf.Tensor, diffuse_gt: tf.Tensor, specular_gt: tf.Tensor, roughness_gt: tf.Tensor, ): with tf.variable_scope("prepare"): mask = mask[:, :, :, 0:1] repeat = [1 for _ in range(len(mask.shape))] repeat[-1] = 3 mask3 = tf.tile(mask, repeat) two_side_by_side(cam1, cam2, "input", 10) in1 = cam1 in2 = cam2 m3 = mask3 two_side_by_side(normal, tf.tile(depth, repeat), "geom", 10) batch_size = tf.shape(cam1)[0] diffuse, specular, roughness = self.network_architecture( in1, in2, m3, normal, depth) rendered = self.render(diffuse, specular, roughness, normal, depth, sgs, mask3) ## Rerender with tf.variable_scope("viz"): rendered_reinhard = rendered / (1.0 + rendered) loss_img_reinhard = cam1 / (1.0 + cam1) two_side_by_side( tf.clip_by_value(tf.pow(loss_img_reinhard, 1.0 / 2.2), 0.0, 1.0), tf.clip_by_value(tf.pow(rendered_reinhard, 1.0 / 2.2), 0.0, 1.0), "rendered", 10, ) with tf.variable_scope("loss"): with tf.variable_scope("rendering"): rerendered_log = tf.clip_by_value( tf.log(1.0 + tf.nn.relu(rendered)), 0.0, 13.0) rerendered_log = tf.check_numerics( rerendered_log, "Rerendered log image contains NaN or Inf") loss_log = tf.clip_by_value(tf.log(1.0 + tf.nn.relu(cam1)), 0.0, 13.0) loss_log = tf.check_numerics( loss_log, "The Loss log image contains NaN or Inf") l1_err = l1_loss(loss_log, rerendered_log) rerendered_loss = tf.reduce_mean(masked_loss(l1_err, mask3), name="rendering_loss") add_moving_summary(rerendered_loss) tf.losses.add_loss(rerendered_loss, tf.GraphKeys.LOSSES) with tf.variable_scope("diffuse"): diffuse_loss = tf.reduce_mean( masked_loss(l1_loss(diffuse_gt, diffuse), mask3), name="diffuse_loss", ) add_moving_summary(diffuse_loss) tf.losses.add_loss(diffuse_loss, tf.GraphKeys.LOSSES) two_side_by_side(diffuse_gt, diffuse, "diffuse", 10) with tf.variable_scope("specular"): specular_loss = tf.reduce_mean( masked_loss(l1_loss(specular_gt, specular), mask3), name="specular_loss", ) add_moving_summary(specular_loss) tf.losses.add_loss(specular_loss, tf.GraphKeys.LOSSES) two_side_by_side(specular_gt, specular, "specular", 10) with tf.variable_scope("roughness"): roughness_loss = tf.reduce_mean( masked_loss(l1_loss(roughness_gt, roughness), mask), name="roughness_loss", ) add_moving_summary(roughness_loss) tf.losses.add_loss(roughness_loss, tf.GraphKeys.LOSSES) two_side_by_side(roughness_gt, roughness, "roughness", 10) self.cost = tf.losses.get_total_loss(name="total_costs") add_moving_summary(self.cost) if self.training: add_param_summary((".*/W", ["histogram"])) # monitor W return self.cost
def _build_graph(self, inputs): image, label = inputs image = image / 255.0 fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def new_get_variable(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fct' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) # still use relu for 32bit cases return tf.clip_by_value(x, 0.0, 1.0) def activate(x): return fa(nonlin(x)) with remap_variables(new_get_variable), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity): logits = (LinearWrap(image) .Conv2D('conv0', 96, 12, stride=4, padding='VALID') .apply(activate) .Conv2D('conv1', 256, 5, padding='SAME', split=2) .apply(fg) .BatchNorm('bn1') .MaxPooling('pool1', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv2', 384, 3) .apply(fg) .BatchNorm('bn2') .MaxPooling('pool2', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv3', 384, 3, split=2) .apply(fg) .BatchNorm('bn3') .apply(activate) .Conv2D('conv4', 256, 3, split=2) .apply(fg) .BatchNorm('bn4') .MaxPooling('pool4', 3, 2, padding='VALID') .apply(activate) .FullyConnected('fc0', 4096) .apply(fg) .BatchNorm('bnfc0') .apply(activate) .FullyConnected('fc1', 4096) .apply(fg) .BatchNorm('bnfc1') .apply(nonlin) .FullyConnected('fct', 1000, use_bias=True)()) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost') add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def build_graph(self, points, idx, label, *split_axis): """This function should build the model which takes the input variables and return cost at the end""" # add all features in the leaf node batch_idx = tf.expand_dims( tf.tile( tf.reshape(tf.range(tf.shape(points)[0]), (-1, 1, 1)), [1, tf.shape(idx)[1], tf.shape(idx)[2]]), -1) points = tf.gather_nd( points, tf.concat([batch_idx, tf.expand_dims(idx, -1)], -1)) points = tf.transpose(tf.reduce_mean(points, -2), (0, 2, 1)) # B * N * 3 x = tf.transpose( tf.nn.conv1d(points, tf.get_variable('kernel_pre', [1, DIM, 32]), 1, 'SAME', data_format='NCHW'), (0, 2, 1)) x = tf.nn.bias_add(x, tf.get_variable('bias_pre', (32, ))) features = [ 32, 32, 64, 64, 128, 128, 256, 256, 512, 512, 128, N_CLASSES ] Ws = [ tf.get_variable('kernel%d' % i, shape=(DIM, 2 * features[i], features[i + 1])) for i in range(DEPTH) ] Bs = [ tf.get_variable('bias%d' % i, shape=(DIM, features[i + 1])) for i in range(DEPTH) ] for i in range(DEPTH): x = tf.expand_dims( tf.reshape(x, [ tf.shape(x)[0], tf.div(tf.shape(x)[1], 2), 2 * features[i] ]), 2) # B * N/2 * 1 * 2F w = tf.gather_nd(Ws[i], tf.expand_dims(split_axis[i], -1)) # B * N/2 * 2F * F_next # x = tf.Print(x, [tf.shape(x), tf.shape(w)], summarize=100) b = tf.gather_nd(Bs[i], tf.expand_dims(split_axis[i], -1)) # B * N/2 * F_next x = tf.squeeze(tf.matmul(x, w), -2) + b if i < DEPTH - 1: x = tf.nn.relu(x) logits = tf.squeeze(x, 1) # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. wd_cost = tf.multiply(1e-3, regularize_cost('kernel.*', tf.nn.l2_loss), name='regularize_loss') total_cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('kernel.*', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def build_graph(self, pc, pc_feature): pc_symmetry = tf.stack([-pc[..., 0], pc[..., 1], pc[..., 2]], -1) # -x dist2sym = tf.reduce_sum((pc[:, :, None] - pc_symmetry[:, None])**2, -1) nearest_idx = tf.argmin(dist2sym, -1, output_type=tf.int32) # smoothnet encoder, only local features are used embedding = SmoothNet(pc_feature, self.cfg) with tf.variable_scope('encoder'): z = tf.sigmoid(embedding[:, :, -1], name='z') output_x = tf.nn.l2_normalize(embedding[:, :, :-1], axis=-1, name='feature') gp_loss = 0. loss_d = 0. loss_g = 0. if get_current_tower_context().is_training: beta_dist = tf.distributions.Beta( concentration1=self.cfg.beta.concentration1, concentration0=self.cfg.beta.concentration0) with tf.variable_scope('GAN'): real_z = beta_dist.sample(tf.shape(z)) fake_val = self.discriminator(tf.stop_gradient(z)) real_val = self.discriminator(real_z) loss_d = tf.reduce_mean(fake_val - real_val, name='loss_d') with varreplace.freeze_variables(stop_gradient=True): loss_g = tf.reduce_mean(-self.discriminator(z), name='loss_g') z_interp = z + tf.random_uniform( (tf.shape(fake_val)[0], 1)) * (real_z - z) gradient_f = tf.gradients(self.discriminator(z_interp), [z_interp])[0] gp_loss = tf.reduce_mean(tf.maximum( tf.norm(gradient_f, axis=-1) - 1, 0)**2, name='gp_loss') code = tf.concat([ tf.reduce_max(tf.nn.relu(output_x) * z[..., None], 1), tf.reduce_max(tf.nn.relu(-output_x) * z[..., None], 1) ], axis=-1, name='code') code = FullyConnected('fc_global', code, self.cfg.topnet.code_nfts, activation=None) # topnet decoder tarch = get_arch(self.cfg.topnet.nlevels, self.cfg.num_points) def create_level(level, input_channels, output_channels, inputs, bn): with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE): features = mlp_conv(inputs, [ input_channels, int(input_channels / 2), int(input_channels / 4), int(input_channels / 8), output_channels * int(tarch[level]) ], get_current_tower_context().is_training, bn) features = tf.reshape( features, [tf.shape(features)[0], -1, output_channels]) return features Nin = self.cfg.topnet.nfeat + self.cfg.topnet.code_nfts Nout = self.cfg.topnet.nfeat bn = True N0 = int(tarch[0]) nlevels = len(tarch) with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): level0 = mlp(code, [256, 64, self.cfg.topnet.nfeat * N0], get_current_tower_context().is_training, bn=True) level0 = tf.tanh(level0, name='tanh_0') level0 = tf.reshape(level0, [-1, N0, self.cfg.topnet.nfeat]) outs = [ level0, ] for i in range(1, nlevels): if i == nlevels - 1: Nout = 3 bn = False inp = outs[-1] y = tf.expand_dims(code, 1) y = tf.tile(y, [1, tf.shape(inp)[1], 1]) y = tf.concat([inp, y], 2) outs.append( tf.tanh(create_level(i, Nin, Nout, y, bn), name='tanh_%d' % (i))) reconstruction = tf.reshape(outs[-1], [-1, self.cfg.num_points, 3], name='recon_pc') loss_recon = chamfer(reconstruction, pc) loss_recon = tf.identity(self.cfg.recon_factor * tf.reduce_mean(loss_recon), name='recon_loss') batch_size = tf.shape(output_x)[0] batch_idx = tf.tile( tf.range(batch_size)[:, None], [1, tf.shape(nearest_idx)[1]]) feature_sym = tf.gather_nd(embedding, tf.stack([batch_idx, nearest_idx], -1)) loss_sym = tf.identity( self.cfg.symmetry_factor * tf.reduce_mean(tf.reduce_sum(tf.abs(feature_sym - embedding), -1)), 'symmetry_loss') wd_cost = tf.multiply(1e-4, regularize_cost('.*(_W|kernel)', tf.nn.l2_loss), name='regularize_loss') loss_gan = loss_d + loss_g + gp_loss total_cost = tf.add_n([loss_recon, wd_cost, loss_gan, loss_sym], name='total_cost') summary.add_moving_summary(loss_recon, loss_sym) summary.add_param_summary(['.*(_W|kernel)', ['histogram', 'rms']]) return total_cost
def _build_graph(self, inputs): #### def down_conv_block(name, l, channel, nr_blks, stride=1): with tf.variable_scope(name): if stride != 1: assert stride == 2, 'U-Net supports stride 2 down-sample only' l = MaxPooling('max_pool', l, 2, strides=2) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l #### def up_conv_block(name, l, shorcut, channel, nr_blks, stride=2): with tf.variable_scope(name): if stride != 1: up_channel = l.get_shape().as_list()[1] # NCHW assert stride == 2, 'U-Net supports stride 2 up-sample only' l = Conv2DTranspose('deconv', l, up_channel, 2, strides=2) l = tf.concat([l, shorcut], axis=1) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l #### is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images if self.type_classification: true_type = truemap_coded[..., 1] true_type = tf.cast(true_type, tf.int32) true_type = tf.identity(true_type, name='truemap-type') one_type = tf.one_hot(true_type, 5, axis=-1) true_type = tf.expand_dims(true_type, axis=-1) true_dst = truemap_coded[..., -1] true_dst = tf.expand_dims(true_dst, axis=-1) true_dst = tf.identity(true_dst, name='truemap-dst') #### Xavier initializer with argscope(Conv2D, activation=tf.identity, use_bias=True, kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(), bias_initializer=tf.constant_initializer(0.1)), \ argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format): i = tf.transpose(images / 255.0, [0, 3, 1, 2]) #### with tf.variable_scope('encoder'): e0 = down_conv_block('e0', i, 32, nr_blks=2, stride=1) e1 = down_conv_block('e1', e0, 64, nr_blks=2, stride=2) e2 = down_conv_block('e2', e1, 128, nr_blks=2, stride=2) e3 = down_conv_block('e3', e2, 256, nr_blks=2, stride=2) e4 = down_conv_block('e4', e3, 512, nr_blks=2, stride=2) c0 = crop_op(e0, (176, 176)) c1 = crop_op(e1, (80, 80)) c2 = crop_op(e2, (32, 32)) c3 = crop_op(e3, (8, 8)) with tf.variable_scope('decoder'): d3 = up_conv_block('d3', e4, c3, 256, nr_blks=2, stride=2) d2 = up_conv_block('d2', d3, c2, 128, nr_blks=2, stride=2) d1 = up_conv_block('d1', d2, c1, 64, nr_blks=2, stride=2) d0 = up_conv_block('d0', d1, c0, 32, nr_blks=2, stride=2) #### logi_dst = Conv2D('conv_out_dst', d0, 1, 1, activation=tf.identity) logi_dst = tf.transpose(logi_dst, [0, 2, 3, 1]) pred_dst = tf.identity(logi_dst, name='predmap-dst') if self.type_classification: logi_type = Conv2D('conv_out_type', d0, 5, 1, activation=tf.identity) logi_type = tf.transpose(logi_type, [0, 2, 3, 1]) soft_type = tf.nn.softmax(logi_type, axis=-1) # encoded so that inference can extract all output at once predmap_coded = tf.concat([soft_type, pred_dst], axis=-1) else: predmap_coded = pred_dst # * channel ordering: type-map, segmentation map # encoded so that inference can extract all output at once predmap_coded = tf.identity(predmap_coded, name='predmap-coded') #### if is_training: ######## LOSS loss = 0 ### regression loss loss_mse = pred_dst - true_dst loss_mse = loss_mse * loss_mse loss_mse = tf.reduce_mean(loss_mse, name='loss_mse') loss += loss_mse if self.type_classification: loss_type = categorical_crossentropy(soft_type, one_type) loss_type = tf.reduce_mean(loss_type, name='loss-xentropy-class') add_moving_summary(loss_type) loss += loss_type wd_loss = regularize_cost('.*/W', l2_regularizer(5.0e-6), name='l2_regularize_loss') loss += wd_loss self.cost = tf.identity(loss, name='cost') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W #### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) orig_imgs = crop_op(orig_imgs, (184, 184), "NHWC") pred_dst = colorize(pred_dst[..., 0], cmap='jet') true_dst = colorize(true_dst[..., 0], cmap='jet') viz = tf.concat([ orig_imgs, true_dst, pred_dst, ], 2) tf.summary.image('output', viz, max_outputs=1) return
def build_graph(self, image, label): is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'weak' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) #return ternarize(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) def merge(x, y): #return x + y #return x - y return tf.concat([x,y], axis=3) image = image / 256.0; k=3; zp=0.25; zp2=zp / 1 #scale = tf.train.exponential_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*5, decay_rate=0.5, staircase=True, name='scale') #scale = tf.where(scale>0.001, scale, tf.zeros_like(scale)) scale = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*50, alpha=0.0) tf.summary.scalar('scale', scale); endconv=[]; endweak=[] #scale2 = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*50, alpha=0.0) #scale3 = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*80, alpha=0.0) with remap_variables(binarize_weight), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): net=Conv2D('conv0', image, np.round(48*zp), 5, padding='VALID', use_bias=True) net=MaxPooling('pool0', net, 2, padding='SAME'); net=activate(net) net1=Conv2D('conv1', net, np.round(64*zp), 3, padding='SAME'); net1=BatchNorm('bn1', net1); endconv.append(net1) net2=Conv2D('weak1', net, np.round(64*zp2), k, padding='SAME'); net2=BatchNorm('bn12', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net=merge(activate(net1), scale*net2) #net=activate(net1) net1=Conv2D('conv2', net, np.round(64*zp), 3, padding='SAME'); net1=BatchNorm('bn2', net1); endconv.append(net1) net2=Conv2D('weak2', net, np.round(64*zp2), k, padding='SAME'); net2=BatchNorm('bn22', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net1=MaxPooling('pool1', net1, 2, padding='SAME'); net2=MaxPooling('pool12', net2, 2, padding='SAME'); net=merge(activate(net1), scale*net2) net=activate(net1) net1=Conv2D('conv3', net, np.round(128*zp), 3, padding='VALID'); net1=BatchNorm('bn3', net1); endconv.append(net1) net2=Conv2D('weak3', net, np.round(128*zp2), k, padding='VALID'); net2=BatchNorm('bn32', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net=merge(activate(net1), scale*net2) #net=activate(net1) net1=Conv2D('conv4', net, np.round(128*zp), 3, padding='SAME'); net1=BatchNorm('bn4', net1); endconv.append(net1) net2=Conv2D('weak4', net, np.round(128*zp2), k, padding='SAME'); net2=BatchNorm('bn42', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net=merge(activate(net1), scale*net2) # net=activate(net1) net1=Conv2D('conv5', net, np.round(128*zp), 3, padding='VALID'); net1=BatchNorm('bn5', net1); endconv.append(net1) net2=Conv2D('weak5', net, np.round(128*zp2), k, padding='VALID'); net2=BatchNorm('bn52', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net=merge(activate(net1), scale*net2) #net=activate(net1) net=tf.nn.dropout(net, 0.5 if is_training else 1.0) net1=Conv2D('conv6', net, np.round(512*zp), 5, padding='VALID'); net1=BatchNorm('bn6', net1); endconv.append(net1) net2=Conv2D('weak6', net, np.round(512*zp2), 5, padding='VALID'); net2=BatchNorm('bn62', net2); endweak.append(net2); # net2=tf.nn.relu(net2) net=merge(cabs(net1), scale*net2) # net=cabs(net1) logits=FullyConnected('fc1', net, 10) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_tensor') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) total_cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, total_cost) for i in range(len(endweak)): add_moving_summary(tf.reduce_mean(tf.abs(endconv[i]), name='mean_conv_'+str(i+1) ) ) add_moving_summary(tf.reduce_mean(tf.abs(endweak[i]), name='mean_weak_'+str(i+1) ) ) return total_cost
def _build_graph(self, inputs): #### is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images pen_map = truemap_coded[..., -1] if self.type_classification: true = truemap_coded[..., 1] else: true = truemap_coded[..., 0] true = tf.cast(true, tf.int32) true = tf.identity(true, name='truemap') one = tf.one_hot( true, self.nr_types if self.type_classification else self.nr_classes, axis=-1) true = tf.expand_dims(true, axis=-1) def down_branch(name, main_in, aux_in, ch): with tf.variable_scope(name): a = Conv2D('conv1', main_in, ch, 3, padding='valid', use_bias=False, activation=BNReLU) a = Conv2D('conv2', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) a = MaxPooling('pool', a, 2, strides=2, padding='same') b = Conv2D('conv3', aux_in, ch, 3, padding='valid', use_bias=False, activation=BNReLU) b = Conv2D('conv4', b, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) c = tf.concat([a, b], axis=1) return c def up_branch(name, main_in, aux_in, ch): with tf.variable_scope(name): a = Conv2DTranspose('up1', main_in, ch, 2, strides=(2, 2), padding='same', use_bias=True, activation=tf.identity) a = Conv2D('conv1', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) a = Conv2D('conv2', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) # stride 1 is no different from normal 5x5 conv, 'valid' to gain extrapolated border pixels b1 = Conv2DTranspose('up2', a, ch, 5, strides=(1, 1), padding='valid', use_bias=True, activation=tf.identity) b2 = Conv2DTranspose('up3', aux_in, ch, 5, strides=(1, 1), padding='valid', use_bias=True, activation=tf.identity) b = tf.concat([b1, b2], axis=1) b = Conv2D('conv3', b, ch, 1, padding='same', use_bias=True, activation=tf.nn.relu) return b def aux_branch(name, main_in, up_kernel, up_strides): ch = main_in.get_shape().as_list()[1] # NCHW with tf.variable_scope(name): # preserve the depth a = Conv2DTranspose('up', main_in, ch, up_kernel, strides=up_strides, padding='same', use_bias=True, activation=tf.identity) a = Conv2D('conv', a, self.nr_types if self.type_classification else self.nr_classes, 3, padding='valid', activation=tf.nn.relu) a = tf.layers.dropout(a, rate=0.5, seed=5, training=is_training) return a #### Xavier initializer with argscope(Conv2D, activation=tf.identity, kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=True), bias_initializer=tf.constant_initializer(0.1)), \ argscope(Conv2DTranspose, activation=tf.identity, kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=True), bias_initializer=tf.constant_initializer(0.1)), \ argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format): i = tf.transpose(images / 255.0, [0, 3, 1, 2]) # our way resize_func = lambda x, y: resize_op( x, size=y, interp='bicubic', data_format='channels_first') #### b1 = down_branch('b1', i, resize_func(i, (128, 128)), 64) b2 = down_branch('b2', b1, resize_func(i, (64, 64)), 128) b3 = down_branch('b3', b2, resize_func(i, (32, 32)), 256) b4 = down_branch('b4', b3, resize_func(i, (16, 16)), 512) with tf.variable_scope('b5'): b5 = Conv2D('conv1', b4, 2048, 3, padding='valid', use_bias=True, activation=tf.nn.relu) b5 = Conv2D('conv2', b5, 2048, 3, padding='valid', use_bias=True, activation=tf.nn.relu) b6 = up_branch('b6', b5, b4, 1024) b7 = up_branch('b7', b6, b3, 512) b8 = up_branch('b8', b7, b2, 256) b9 = up_branch('b9', b8, b1, 128) aux_out1 = aux_branch('aux_out1', b9, 2, (2, 2)) aux_out2 = aux_branch('aux_out2', b8, 4, (4, 4)) aux_out3 = aux_branch('aux_out3', b7, 8, (8, 8)) out = tf.concat([aux_out1, aux_out2, aux_out3], axis=1) out_list = [out, aux_out1, aux_out2, aux_out3] soft_list = [] prob_list = [] for idx, sub_out in enumerate(out_list): logi = Conv2D('conv_out%d' % idx, sub_out, self.nr_types if self.type_classification else self.nr_classes, 3, padding='valid', use_bias=True, activation=tf.identity) logi = tf.transpose(logi, [0, 2, 3, 1]) soft = tf.nn.softmax(logi, axis=-1) if self.type_classification: prob_np = tf.reduce_sum(soft[..., 1:], axis=-1, keepdims=True) prob_np = tf.identity(prob_np, name='predmap-prob-np') else: prob_np = tf.identity(soft[..., 1], name='predmap-prob') prob_np = tf.expand_dims(prob_np, axis=-1) soft_list.append(soft) prob_list.append(prob_np) # return the aggregated output # encoded so that inference can extract all output at once if self.type_classification: predmap_coded = tf.concat([soft_list[0], prob_list[0]], axis=-1, name='predmap-coded') else: predmap_coded = tf.identity(prob_list[0], name='predmap-coded') #### if is_training: ######## LOSS # get the variable to received fed weight from external scheduler with tf.variable_scope("", reuse=True): aux_loss_dw = tf.get_variable('aux_loss_dw') loss_list = [] # index 0 is main output global_step = tf.train.get_or_create_global_step() global_step = tf.cast(global_step, tf.float32) for idx, sub_soft in enumerate(soft_list): loss_bce = categorical_crossentropy(sub_soft, one) loss_bce = tf.reduce_mean(loss_bce * pen_map) loss_bce = loss_bce if idx == 0 else loss_bce * aux_loss_dw loss_bce = tf.identity(loss_bce, name='loss-bce-%d' % idx) loss_list.append(loss_bce) add_moving_summary(loss_bce) wd_loss = regularize_cost('.*/W', l2_regularizer(1.0e-5), name='l2_wd_loss') add_moving_summary(wd_loss) cost = tf.add_n(loss_list) + wd_loss self.cost = tf.identity(cost, name='overall_cost') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W #### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) colored_list = [true ] + prob_list + [tf.expand_dims(pen_map, axis=-1)] colored_list = [ colorize(feat[..., 0], cmap='jet') for feat in colored_list ] viz = tf.concat([orig_imgs] + colored_list, 2) tf.summary.image('output', viz, max_outputs=1) return
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2') l = tf.layers.max_pooling2d(l, 2, 2, padding='valid') l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3') l = tf.layers.flatten(l) l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0') l = tf.layers.dropout( l, rate=0.5, training=get_current_tower_context().is_training) logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1') tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/kernel', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
def build_graph(self, image, edgemap): image = image - tf.constant([104, 116, 122], dtype='float32') edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d') def branch(name, l, up): with tf.variable_scope(name): l = Conv2D('convfc', l, 1, kernel_size=1, activation=tf.identity, use_bias=True, kernel_initializer=tf.constant_initializer()) while up != 1: l = BilinearUpSample('upsample{}'.format(up), l, 2) up = up / 2 return l with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu): l = Conv2D('conv1_1', image, 64) l = Conv2D('conv1_2', l, 64) b1 = branch('branch1', l, 1) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2_1', l, 128) l = Conv2D('conv2_2', l, 128) b2 = branch('branch2', l, 2) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3_1', l, 256) l = Conv2D('conv3_2', l, 256) l = Conv2D('conv3_3', l, 256) b3 = branch('branch3', l, 4) l = MaxPooling('pool3', l, 2) l = Conv2D('conv4_1', l, 512) l = Conv2D('conv4_2', l, 512) l = Conv2D('conv4_3', l, 512) b4 = branch('branch4', l, 8) l = MaxPooling('pool4', l, 2) l = Conv2D('conv5_1', l, 512) l = Conv2D('conv5_2', l, 512) l = Conv2D('conv5_3', l, 512) b5 = branch('branch5', l, 16) final_map = Conv2D('convfcweight', tf.concat([b1, b2, b3, b4, b5], 3), 1, kernel_size=1, kernel_initializer=tf.constant_initializer(0.2), use_bias=False, activation=tf.identity) costs = [] for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1)) xentropy = class_balanced_sigmoid_cross_entropy( b, edgemap, name='xentropy{}'.format(idx + 1)) costs.append(xentropy) # some magic threshold pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction') wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32) wrong = tf.reduce_mean(wrong, name='train_error') if get_current_tower_context().is_training: wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') costs.append(wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W total_cost = tf.add_n(costs, name='cost') add_moving_summary(costs + [wrong, total_cost]) return total_cost
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) image = image / 256.0 with remap_variables(binarize_weight), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False, nl=tf.identity): logits = ( LinearWrap(image).Conv2D('conv0', 48, 5, padding='VALID', use_bias=True).MaxPooling( 'pool0', 2, padding='SAME').apply(activate) # 18 .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm( 'bn1').apply(activate).Conv2D( 'conv2', 64, 3, padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling( 'pool1', 2, padding='SAME').apply(activate) # 9 .Conv2D( 'conv3', 128, 3, padding='VALID').apply(fg).BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', 128, 3, padding='SAME').apply(fg). BatchNorm('bn4').apply(activate).Conv2D( 'conv5', 128, 3, padding='VALID').apply(fg).BatchNorm('bn5').apply(activate) # 5 .tf.nn.dropout(0.5 if is_training else 1.0).Conv2D( 'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm( 'bn6').apply(cabs).FullyConnected('fc1', 10, nl=tf.identity)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): def resnet101(image): mode = 'resnet' depth = 101 basicblock = preresnet_basicblock if mode == 'preact' else resnet_basicblock bottleneck = { 'resnet': resnet_bottleneck_deeplab, 'preact': preresnet_bottleneck, 'se': se_resnet_bottleneck }[mode] num_blocks, block_func = { 18: ([2, 2, 2, 2], basicblock), 34: ([3, 4, 6, 3], basicblock), 50: ([3, 4, 6, 3], bottleneck), 101: ([3, 4, 23, 3], bottleneck), 152: ([3, 8, 36, 3], bottleneck) }[depth] def get_logits(image): with argscope( [Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format="NHWC"): return resnet_backbone( image, num_blocks, preresnet_group if mode == 'preact' else resnet_group, block_func, CLASS_NUM, ASPP=False) return get_logits(image) image, label = inputs image = image - tf.constant([104, 116, 122], dtype='float32') label = tf.identity(label, name="label") predict = resnet101(image) costs = [] prob = tf.nn.softmax(predict, name='prob') label4d = tf.expand_dims(label, 3, name='label4d') new_size = prob.get_shape()[1:3] cost = softmax_cross_entropy_with_ignore_label(logits=predict, label=label4d, class_num=CLASS_NUM) prediction = tf.argmax(prob, axis=-1, name="prediction") cost = tf.reduce_mean( cost, name='cross_entropy_loss') # the average cross-entropy loss costs.append(cost) if get_current_tower_context().is_training: wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') costs.append(wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W self.cost = tf.add_n(costs, name='cost')
def build_graph(self, x, center_label, heading_class_label, heading_residual_label, size_class_label, size_residual_label,sem_cls_label, box_label_mask, vote_label, vote_label_mask, scan_idx, max_gt_bboxes): l0_xyz = x[:,:,:3] l0_points = None if x.shape[-1] <=3 else x[:,:,3:] end_points = {} # Set Abstraction layers l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=2048, radius=0.2, nsample=64, mlp=[64, 64, 128], mlp2=None, group_all=False, scope='sa1', use_xyz=True, normalize_xyz=True) l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=1024, radius=0.4, nsample=32, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa2', use_xyz=True, normalize_xyz=True) l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=512, radius=0.8, nsample=16, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa3', use_xyz=True, normalize_xyz=True) l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=256, radius=1.2, nsample=16, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa4', use_xyz=True, normalize_xyz=True) # Feature Propagation layers l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], scope='fp1') seed_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], scope='fp2') seed_xyz = l2_xyz # fp2_inds fp2_inds = l1_indices[:, 0:tf.shape(seed_xyz)[1]] # Voting Module layers # seed_xyz seed_points (B, 512, 3/C) vote_xyz, vote_features = self.hough_voting_mlp(seed_xyz, seed_points) # Proposal Module layers # Farthest point sampling on seeds proposals_xyz, proposals_output, _ = pointnet_sa_module(vote_xyz, vote_features, npoint=config.PROPOSAL_NUM, radius=0.3, nsample=64, mlp=[128, 128, 128], mlp2=[128, 128,5+2 * config.NH+4 * config.NS+config.NC], group_all=False, scope='proposal', use_xyz=True, normalize_xyz=True) end_points['proposals_xyz'] = proposals_xyz end_points = self.parse_outputs_to_tensor(proposals_output, end_points) self.calc_inference_v1(end_points) vote_loss = self.vote_reg_loss(seed_xyz, vote_xyz, fp2_inds, vote_label, vote_label_mask) objectness_loss, objectness_label, objectness_mask, object_assignment = self.compute_objectness_loss( proposals_xyz, center_label, end_points) loss_points = self.compute_box_loss_and_sem_loss(end_points, center_label, heading_class_label, heading_residual_label, size_class_label, size_residual_label, sem_cls_label, object_assignment, box_label_mask, objectness_label) # box loss box_loss = tf.identity(loss_points['center_loss'] + 0.1 * loss_points['heading_cls_loss'] + loss_points['heading_residual_loss']+ 0.1 * loss_points['size_cls_loss'] + loss_points['size_residual_loss'], name='box_loss') # wd_cost = tf.multiply(1e-5, # regularize_cost('.*/W', tf.nn.l2_loss), # name='regularize_loss') total_cost = vote_loss + 0.5 * objectness_loss + 1. * box_loss + 0.1 * loss_points['sem_cls_loss'] # if not get_current_tower_context().is_training: # self.calc_inference() total_cost = tf.identity(total_cost, 'total_loss') # total_cost = tf.add_n([total_cost, wd_cost], name='total_loss') summary.add_moving_summary(total_cost, vote_loss, objectness_loss, box_loss, loss_points['center_loss'], loss_points['center_loss_left'], loss_points['center_loss_right'], loss_points['heading_cls_loss'], loss_points['heading_residual_loss'], loss_points['size_cls_loss'], loss_points['size_residual_loss'], loss_points['sem_cls_loss'], # wd_cost, decay=0) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): is_training = get_current_tower_context().is_training images, truemap_coded = inputs orig_imgs = images pen_map = truemap_coded[..., -1] true_np = truemap_coded[..., 0] true_np = tf.cast(true_np, tf.int32) true_np = tf.identity(true_np, name="truemap-np") one_np = tf.one_hot(true_np, 2, axis=-1) true_np = tf.expand_dims(true_np, axis=-1) true_mk = truemap_coded[..., 1:4] true_mk = tf.cast(true_mk, tf.int32) true_mk = tf.identity(true_mk, name="truemap-mk") one_mk = tf.cast(true_mk, tf.float32) #### with argscope( Conv2D, activation=tf.identity, use_bias=False, # K.he initializer W_init=tf.variance_scaling_initializer(scale=2.0, mode="fan_out"), ), argscope([Conv2D], data_format=self.data_format): i = images if not self.input_norm else images / 255.0 #### d = encoder( "encoder", i, self.basis_filter_list, self.rot_matrix_list, self.nr_orients, self.filter_type, is_training, ) #### feat = decoder( "decoder", d, self.basis_filter_list, self.rot_matrix_list, self.nr_orients, self.filter_type, is_training, ) feat_np = Conv2D("feat_np", feat, 96, 1, use_bias=True, nl=BNReLU) o_logi_np = Conv2D("output_np", feat_np, 2, 1, use_bias=True, nl=tf.identity) soft_np = tf.nn.softmax(o_logi_np, axis=-1) prob_np = tf.identity(soft_np[..., 1], name="predmap-prob") prob_np = tf.expand_dims(prob_np, -1) feat_mk = Conv2D("feat_mk", feat, 96, 1, use_bias=True, nl=BNReLU) o_logi_mk = Conv2D("output_mk", feat_mk, 3, 1, use_bias=True, nl=tf.identity) soft_mk = tf.nn.softmax(o_logi_mk, axis=-1) prob_mk = tf.identity(soft_mk[..., :2], name="predmap-prob") # encoded so that inference can extract all output at once predmap_coded = tf.concat([prob_np, prob_mk], axis=-1, name="predmap-coded") #### if get_current_tower_context().is_training: # ---- LOSS ----# loss = 0 for term, weight in self.loss_term.items(): if term == "bce": term_loss_np = categorical_crossentropy(soft_np, one_np) term_loss_np = tf.reduce_mean(term_loss_np, name="loss-bce-np") term_loss_mk = categorical_crossentropy(soft_mk, one_mk) term_loss_mk = tf.reduce_mean(term_loss_mk * pen_map, name="loss-bce-mk") elif "dice" in self.loss_term: # branch 1 term_loss_np = dice_loss( soft_np[..., 0], one_np[..., 0]) + dice_loss( soft_np[..., 1], one_np[..., 1]) term_loss_np = tf.identity(term_loss_np, name="loss-dice-np") term_loss_mk = dice_loss( soft_mk[..., 0], one_mk[..., 0]) + dice_loss( soft_mk[..., 1], one_mk[..., 1]) term_loss_mk = tf.identity(term_loss_mk, name="loss-dice-mk") else: assert False, "Not support loss term: %s" % term add_moving_summary(term_loss_np) add_moving_summary(term_loss_mk) loss += term_loss_np + term_loss_mk ### combine the loss into single cost function wd_loss = regularize_cost(".*/W", l2_regularizer(1.0e-7), name="l2_wd_loss") add_moving_summary(wd_loss) self.cost = tf.identity(loss + wd_loss, name="overall-loss") add_moving_summary(self.cost) #### add_param_summary((".*/W", ["histogram"])) # monitor W ### logging visual sthg orig_imgs = tf.cast(orig_imgs, tf.uint8) tf.summary.image("input", orig_imgs, max_outputs=1) pred_np = colorize(prob_np[..., 0], cmap="jet") true_np = colorize(true_np[..., 0], cmap="jet") pred_mk_blb = colorize(prob_mk[..., 0], cmap="jet") true_mk_blb = colorize(true_mk[..., 0], cmap="jet") pred_mk_cnt = colorize(prob_mk[..., 1], cmap="jet") true_mk_cnt = colorize(true_mk[..., 1], cmap="jet") viz = tf.concat( [ orig_imgs, pred_np, pred_mk_blb, pred_mk_cnt, true_np, true_mk_blb, true_mk_cnt, ], 2, ) viz = tf.concat([viz[0], viz[-1]], axis=0) viz = tf.expand_dims(viz, axis=0) tf.summary.image("output", viz, max_outputs=1) return
def get_logits(self, image): if BITW == 't': fw, fa, fg = get_dorefa(32, 32, 32) fw = ternarize else: fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def new_get_variable(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fct' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) # still use relu for 32bit cases return tf.clip_by_value(x, 0.0, 1.0) def activate(x): return fa(nonlin(x)) with remap_variables(new_get_variable), \ argscope([Conv2D, BatchNorm, MaxPooling], data_format='channels_first'), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): logits = (LinearWrap(image) .Conv2D('conv0', 96, 12, strides=4, padding='VALID', use_bias=True) .apply(activate) .Conv2D('conv1', 256, 5, padding='SAME', split=2) .apply(fg) .BatchNorm('bn1') .MaxPooling('pool1', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv2', 384, 3) .apply(fg) .BatchNorm('bn2') .MaxPooling('pool2', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv3', 384, 3, split=2) .apply(fg) .BatchNorm('bn3') .apply(activate) .Conv2D('conv4', 256, 3, split=2) .apply(fg) .BatchNorm('bn4') .MaxPooling('pool4', 3, 2, padding='VALID') .apply(activate) .FullyConnected('fc0', 4096) .apply(fg) .BatchNorm('bnfc0') .apply(activate) .FullyConnected('fc1', 4096, use_bias=False) .apply(fg) .BatchNorm('bnfc1') .apply(nonlin) .FullyConnected('fct', 1000, use_bias=True)()) add_param_summary(('.*/W', ['histogram', 'rms'])) tf.nn.softmax(logits, name='output') # for prediction return logits
def build_graph(self, _, x): l0_xyz = x l0_points = x # Set Abstraction layers l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=2048, radius=0.2, nsample=64, mlp=[64, 64, 128], mlp2=None, group_all=False, scope='sa1') l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=1024, radius=0.4, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=512, radius=0.8, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa3') l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=256, radius=1.2, nsample=64, mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa4') # Feature Propagation layers l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], scope='fp1') seeds_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], scope='fp2') seeds_xyz = l2_xyz # Voting Module layers offset = tf.reshape(tf.concat([seeds_xyz, seeds_points], 2), [-1, 256 + 3]) units = [256, 256, 256 + 3] for i in range(len(units)): offset = FullyConnected( 'voting%d' % i, offset, units[i], activation=BNReLU if i < len(units) - 1 else None) offset = tf.reshape(offset, [-1, 1024, 256 + 3]) # B * N * 3 votes = tf.concat([seeds_xyz, seeds_points], 2) + offset votes_xyz = votes[:, :, :3] ''' dist2center = tf.abs(tf.expand_dims(seeds_xyz, 2) - tf.expand_dims(bboxes_xyz, 1)) surface_ind = tf.less(dist2center, tf.expand_dims(bboxes_lwh, 1) / 2.) # B * N * BB * 3, bool surface_ind = tf.equal(tf.count_nonzero(surface_ind, -1), 3) # B * N * BB surface_ind = tf.greater_equal(tf.count_nonzero(surface_ind, -1), 1) # B * N, should be in at least one bbox ''' ''' dist2center_norm = tf.norm(dist2center, axis=-1) # B * N * BB votes_assignment = tf.argmin(dist2center_norm, -1, output_type=tf.int32) # B * N, int bboxes_xyz_votes_gt = tf.gather_nd(bboxes_xyz, tf.stack([ tf.tile(tf.expand_dims(tf.range(tf.shape(votes_assignment)[0]), -1), [1, tf.shape(votes_assignment)[1]]), votes_assignment], 2)) # B * N * 3 vote_reg_loss = tf.reduce_mean(tf.norm(votes_xyz - bboxes_xyz_votes_gt, ord=1, axis=-1) * tf.cast(surface_ind, tf.float32), name='vote_reg_loss') ''' votes_points = votes[:, :, 3:] # Proposal Module layers # Farthest point sampling on seeds proposals_xyz, proposals_output, _ = pointnet_sa_module( votes_xyz, votes_points, npoint=config.PROPOSAL_NUM, radius=0.3, nsample=64, mlp=[128, 128, 128], # mlp2=[128, 128, 5+2 * config.NH+4 * config.NS+config.NC], mlp2=[128, 128, config.PARA_MUN], group_all=False, scope='proposal', sample_xyz=seeds_xyz) ''' nms_iou = tf.get_variable('nms_iou', shape=[], initializer=tf.constant_initializer(0.25), trainable=False) ''' if not get_current_tower_context().is_training: def get_3d_bbox(box_size, heading_angle, center): batch_size = tf.shape(heading_angle)[0] c = tf.cos(heading_angle) s = tf.sin(heading_angle) zeros = tf.zeros_like(c) ones = tf.ones_like(c) rotation = tf.reshape( tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c], -1), tf.stack([batch_size, -1, 3, 3])) l, w, h = box_size[..., 0], box_size[..., 1], box_size[ ..., 2] # lwh(xzy) order!!! corners = tf.reshape( tf.stack([ l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2 ], -1), tf.stack([batch_size, -1, 3, 8])) return tf.einsum('ijkl,ijlm->ijmk', rotation, corners) + tf.expand_dims( center, 2) # B * N * 8 * 3 class_mean_size_tf = tf.constant(class_mean_size) size_cls_pred = tf.argmax( proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH + config.NS], axis=-1) size_cls_pred_onehot = tf.one_hot(size_cls_pred, depth=config.NS, axis=-1) # B * N * NS size_residual_pred = tf.reduce_sum( tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape( proposals_output[..., 5 + 2 * config.NH + config.NS:5 + 2 * config.NH + 4 * config.NS], (-1, config.PROPOSAL_NUM, config.NS, 3)), axis=2) size_pred = tf.gather_nd( class_mean_size_tf, tf.expand_dims(size_cls_pred, -1)) * tf.maximum( 1 + size_residual_pred, 1e-6) # B * N * 3: size # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]): center_pred = proposals_xyz + proposals_output[..., 2:5] # B * N * 3 heading_cls_pred = tf.argmax(proposals_output[..., 5:5 + config.NH], axis=-1) heading_cls_pred_onehot = tf.one_hot(heading_cls_pred, depth=config.NH, axis=-1) heading_residual_pred = tf.reduce_sum( heading_cls_pred_onehot * proposals_output[..., 5 + config.NH:5 + 2 * config.NH], axis=2) heading_pred = tf.floormod( (tf.cast(heading_cls_pred, tf.float32) * 2 + heading_residual_pred) * np.pi / config.NH, 2 * np.pi) # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]): bboxes = get_3d_bbox( size_pred, heading_pred, center_pred) # B * N * 8 * 3, lhw(xyz) order!!! # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1) # B * N * 6, lhw(xyz) order!!! # with tf.control_dependencies([tf.print(bboxes[0, 0])]): nms_idx = NMS3D(bboxes, tf.reduce_max(proposals_output[..., -config.NC:], axis=-1), proposals_output[..., :2], nms_iou) # Nnms * 2 bboxes_pred = tf.gather_nd(bboxes, nms_idx, name='bboxes_pred') # Nnms * 8 * 3 class_scores_pred = tf.gather_nd( proposals_output[..., -config.NC:], nms_idx, name='class_scores_pred') # Nnms * C batch_idx = tf.identity( nms_idx[:, 0], name='batch_idx' ) # Nnms, this is used to identify between batches return # calculate positive and negative proposal idxes bboxes_xyz_gt = bboxes_xyz # B * BB * 3 ''' bboxes_labels_gt = semantic_labels # B * BB bboxes_heading_labels_gt = heading_labels bboxes_heading_residuals_gt = heading_residuals bboxes_size_labels_gt = size_labels bboxes_size_residuals_gt = size_residuals dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) - tf.expand_dims(bboxes_xyz_gt, 1), axis=-1) # B * PR * BB bboxes_assignment = tf.argmin(dist_mat, axis=-1) # B * PR min_dist = tf.reduce_min(dist_mat, axis=-1) ''' ''' positive_idxes = tf.where(min_dist < config.POSITIVE_THRES) # Np * 2 # with tf.control_dependencies([tf.print(tf.shape(positive_idxes))]): negative_idxes = tf.where(min_dist > config.NEGATIVE_THRES) # Nn * 2 positive_gt_idxes = tf.stack([positive_idxes[:, 0], tf.gather_nd(bboxes_assignment, positive_idxes)], axis=1) # objectiveness loss pos_obj_cls_score = tf.gather_nd(obj_cls_score, positive_idxes) pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32) neg_obj_cls_score = tf.gather_nd(obj_cls_score, negative_idxes) neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]], dtype=tf.int32) obj_cls_loss = tf.identity(tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pos_obj_cls_score, labels=pos_obj_cls_gt)) + tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=neg_obj_cls_score, labels=neg_obj_cls_gt)), name='obj_cls_loss') obj_correct = tf.concat([tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1), tf.float32), tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1), tf.float32)], axis=0, name='obj_correct') obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy') ''' ''' # center regression losses center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes) delta_predicted = tf.gather_nd(proposals_output[..., 2:5], positive_idxes) delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes) center_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=delta_gt, predictions=delta_predicted, reduction=tf.losses.Reduction.NONE), axis=-1)) ''' ''' # Appendix A1: chamfer loss, assignment at least one bbox to each gt bbox bboxes_assignment_dual = tf.argmin(dist_mat, axis=1) # B * BB batch_idx = tf.tile(tf.expand_dims(tf.range(tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]), axis=-1), [1, tf.shape(bboxes_assignment_dual)[1]]) # B * BB delta_gt_dual = bboxes_xyz_gt - tf.gather_nd(proposals_xyz, tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3 delta_predicted_dual = tf.gather_nd(proposals_output[..., 2:5], tf.stack([batch_idx, bboxes_assignment_dual], axis=-1)) # B * BB * 3 center_loss_dual = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=delta_gt_dual, predictions=delta_predicted_dual, reduction=tf.losses.Reduction.NONE), axis=-1)) # add up center_loss += center_loss_dual ''' ''' # Heading loss heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt, positive_gt_idxes) heading_cls_score = tf.gather_nd(proposals_output[..., 5:5+config.NH], positive_idxes) heading_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=heading_cls_score, labels=heading_cls_gt)) heading_cls_gt_onehot = tf.one_hot(heading_cls_gt, depth=config.NH, on_value=1, off_value=0, axis=-1) # Np * NH heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt, positive_gt_idxes) # Np heading_residual_predicted = tf.gather_nd(proposals_output[..., 5 + config.NH:5+2 * config.NH], positive_idxes) # Np * NH heading_residual_loss = tf.losses.huber_loss(labels=heading_residual_gt, predictions=tf.reduce_sum(heading_residual_predicted * tf.to_float(heading_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.MEAN) # Size loss size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes) size_cls_score = tf.gather_nd(proposals_output[..., 5+2 * config.NH:5+2 * config.NH + config.NS], positive_idxes) size_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=size_cls_score, labels=size_cls_gt)) size_cls_gt_onehot = tf.one_hot(size_cls_gt, depth=config.NS, on_value=1, off_value=0, axis=-1) # Np * NS size_cls_gt_onehot = tf.tile(tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1), [1, 1, 3]) # Np * NS * 3 size_residual_gt = tf.gather_nd(bboxes_size_residuals_gt, positive_gt_idxes) # Np * 3 size_residual_predicted = tf.reshape(tf.gather_nd(proposals_output[..., 5+2 * config.NH + config.NS:5+2 * config.NH + 4 * config.NS], positive_idxes), (-1, config.NS, 3)) # Np * NS * 3 size_residual_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=size_residual_gt, predictions=tf.reduce_sum(size_residual_predicted * tf.to_float(size_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.NONE), axis=-1)) box_loss = center_loss + 0.1 * heading_cls_loss + heading_residual_loss + 0.1 * size_cls_loss + size_residual_loss # semantic loss sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:], positive_idxes) sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes) # Np sem_cls_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=sem_cls_score, labels=sem_cls_gt), name='sem_cls_loss') sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1), tf.float32, name='sem_correct') sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy') ''' ''' # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically # 1. written to tensosrboard # 2. written to stat.json # 3. printed after each epoch summary.add_moving_summary(obj_accuracy, sem_accuracy) ''' # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers # If you don't like regex, you can certainly define the cost in any other methods. # no weight decay # wd_cost = tf.multiply(1e-5, # regularize_cost('.*/W', tf.nn.l2_loss), # name='regularize_loss') '''' # bboxes_xyz(the gt of bounding box center): B * BB * 3 (BB is the num of bounding box) # votes_xys: B * N * 3 (N is the number of votes) # when compare bboxes_xyz and votes_xyz, expand dims to B * N * BB * 3 # after expand_dims, become B * 1 * BB * 3, B * N * 1 * 3, Tensorflow will use broadcast # proposals_xyz: B * PR * 3 (PR is the num of proposal) ''' # vote_reg_loss # refer to line 61 in model.py when writing these codes # TODO: Here, we use the nearest center as the GT, need to implement the version that using the closest box's # center as GT vote2proposal_center = tf.abs( tf.expand_dims(votes_xyz, 2) - tf.expand_dims(proposals_xyz, 1)) # B * N * PR * 3 vote2proposal_center_norm = tf.norm(vote2proposal_center, axis=-1) # B * N * PR votes_assignment = tf.argmin(vote2proposal_center_norm, -1, output_type=tf.int32) # B * N, int votes_gt = tf.gather_nd( proposals_xyz, tf.stack([ tf.tile(input=tf.expand_dims( tf.range(tf.shape(votes_assignment)[0]), -1), multiples=[1, tf.shape(votes_assignment)[1]]), votes_assignment ], 2) ) # gather a B * N * 3 tensor from B * PR * 3 according to a B * N(votes_assignment) # the indices will be B * N * 2, indices[b, n] = [b, votes_assignment[b, n]] votes_gt_no_gradient = tf.stop_gradient(votes_gt) vote_reg_loss = tf.reduce_mean(tf.norm(votes_xyz - votes_gt_no_gradient, ord=1, axis=-1), name='vote_reg_loss') # obj_cls_loss & box_loss # First decide which box it is fit with for every point ''' we assume that the proposals_output is B * PR * 11(2 objectness, 3 xyz, 3 lwh, 3 angles) data_idx is B * P * 3 (P is the number of total points) we want to get pts_assignment of B * P, pts_fit_loss of B * P ''' # the rotation angle of each points relative to the proposal boxes alphas_star = -proposals_output[:, :, 8] # B * PR betas_star = -proposals_output[:, :, 9] # B * PR gammas_star = -proposals_output[:, :, 10] # B * PR # referring to https://en.wikipedia.org/wiki/Rotation_matrix#In_three_dimensions # rotation matrix # TODO: When do visualization, the meaning of the angles should be consistent b_pr = alphas_star.shape pr = alphas_star.shape[1] p = x.shape[1] r_alphas = tf.stack([ tf.ones(b_pr), tf.zeros(b_pr), tf.zeros(b_pr), tf.zeros(b_pr), tf.cos(alphas_star), -tf.sin(alphas_star), tf.zeros(b_pr), tf.sin(alphas_star), tf.cos(alphas_star) ], axis=2) r_betas = tf.stack([ tf.cos(betas_star), tf.zeros(b_pr), tf.sin(betas_star), tf.zeros(b_pr), tf.ones(b_pr), tf.zeros(b_pr), -tf.sin(betas_star), tf.zeros(b_pr), tf.cos(betas_star) ], axis=2) r_gammas = tf.stack([ tf.cos(gammas_star), -tf.sin(gammas_star), tf.zeros(b_pr), tf.sin(gammas_star), tf.cos(gammas_star), tf.zeros(b_pr), tf.zeros(b_pr), tf.zeros(b_pr), tf.ones(b_pr) ], axis=2) r_alphas = tf.reshape(r_alphas, shape=[b_pr[0], b_pr[1], 3, 3]) r_betas = tf.reshape(r_betas, shape=[b_pr[0], b_pr[1], 3, 3]) r_gammas = tf.reshape(r_gammas, shape=[b_pr[0], b_pr[1], 3, 3]) r_matrix = tf.linalg.matmul(r_alphas, tf.linalg.matmul( r_betas, r_gammas)) # B * PR * 3 * 3 r_matrix_expand = tf.expand_dims(r_matrix, axis=1) # B * 1 * PR * 3 * 3 r_matrix_tile = tf.tile(r_matrix_expand, multiples=[1, p, 1, 1, 1]) # B * P * PR * 3 * 3 x_expand = tf.expand_dims(tf.expand_dims(x, axis=2), axis=-1) # B * P * 1 * 3 * 1 from B * P * 3 # here, we need column vector to do the multiplication, x_tile = tf.tile(x_expand, multiples=[1, 1, pr, 1, 1]) # B * P * PR * 3 * 1 rotated_data_idx = tf.squeeze(tf.linalg.matmul( r_matrix_tile, x_tile)) # B * P * PR * 3 # squeeze the additional axis to get the position tensor pts_to_box_assignment, pts_to_box_distance = pts2box( rotated_data_idx, proposals_output[:, :, 2:8]) # both are B * P & B * P # obj_cls_loss # abandon the point at the origin origin_index = tf.equal(tf.count_nonzero(x, axis=-1), 3) # B * P, origin point will be 1 is_not_origin = tf.tile(tf.expand_dims( 1 - tf.cast(origin_index, dtype=tf.float32), axis=-1), multiples=[1, 1, pr]) # B * P * PR proposal_fit_count = tf.count_nonzero(tf.math.multiply( tf.one_hot(pts_to_box_assignment, depth=pr), is_not_origin), axis=1) # B * PR obj_gt = tf.math.greater( proposal_fit_count, config.POSITIVE_THRES_NUM) # B * PR, 1 or positive obj_cls_score_gt = tf.one_hot(obj_gt, depth=2, axis=-1) # B * PR * 2 obj_cls_score = tf.identity(proposals_output[..., :2], 'obj_scores') # B * PR * 2 obj_cls_loss = tf.identity(tf.reduce_min( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=obj_cls_score, labels=obj_cls_score_gt)), name='obj_cls_loss') # box_loss pos_pts_to_box_distance = pos_pts2box(rotated_data_idx, proposals_output[:, :, 2:8], obj_gt) box_loss = tf.math.reduce_sum( tf.math.multiply(pos_pts_to_box_distance, 1 - origin_index)) # total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss total_cost = tf.identity(total_cost, name='total_cost') summary.add_moving_summary(total_cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) image = image / 256.0 with remap_variables(binarize_weight), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False, nl=tf.identity): logits = (LinearWrap(image) .Conv2D('conv0', 48, 5, padding='VALID', use_bias=True) .MaxPooling('pool0', 2, padding='SAME') .apply(activate) # 18 .Conv2D('conv1', 64, 3, padding='SAME') .apply(fg) .BatchNorm('bn1').apply(activate) .Conv2D('conv2', 64, 3, padding='SAME') .apply(fg) .BatchNorm('bn2') .MaxPooling('pool1', 2, padding='SAME') .apply(activate) # 9 .Conv2D('conv3', 128, 3, padding='VALID') .apply(fg) .BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', 128, 3, padding='SAME') .apply(fg) .BatchNorm('bn4').apply(activate) .Conv2D('conv5', 128, 3, padding='VALID') .apply(fg) .BatchNorm('bn5').apply(activate) # 5 .tf.nn.dropout(0.5 if is_training else 1.0) .Conv2D('conv6', 512, 5, padding='VALID') .apply(fg).BatchNorm('bn6') .apply(cabs) .FullyConnected('fc1', 10, nl=tf.identity)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): images, truemap_coded = inputs orig_imgs = images true_np = truemap_coded[...,0] true_np = tf.cast(true_np, tf.int32) true_np = tf.identity(true_np, name='truemap-np') one_np = tf.one_hot(true_np, 2, axis=-1) true_np = tf.expand_dims(true_np, axis=-1) true_dist = truemap_coded[...,1:] true_dist = tf.identity(true_dist, name='truemap-dist') #### with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, BatchNorm], data_format=self.data_format): i = tf.transpose(images, [0, 3, 1, 2]) i = i if not self.input_norm else i / 255.0 #### d = encoder(i, self.freeze) d[0] = crop_op(d[0], (184, 184)) d[1] = crop_op(d[1], (72, 72)) #### np_feat = decoder('np', d) np = BNReLU('preact_out_np', np_feat[-1]) dist_feat = decoder('dst', d) dist = BNReLU('preact_out_dist', dist_feat[-1]) #### logi_np = Conv2D('conv_out_np', np, 2, 1, use_bias=True, activation=tf.identity) logi_np = tf.transpose(logi_np, [0, 2, 3, 1]) soft_np = tf.nn.softmax(logi_np, axis=-1) prob_np = tf.identity(soft_np[...,1], name='predmap-prob-np') prob_np = tf.expand_dims(prob_np, axis=-1) pred_np = tf.argmax(soft_np, axis=-1, name='predmap-np') pred_np = tf.expand_dims(tf.cast(pred_np, tf.float32), axis=-1) #### logi_dist = Conv2D('conv_out_dist', dist, 1, 1, use_bias=True, activation=tf.identity) logi_dist = tf.transpose(logi_dist, [0, 2, 3, 1]) prob_dist = tf.identity(logi_dist, name='predmap-prob-dist') pred_dist = tf.identity(logi_dist, name='predmap-dist') # encoded so that inference can extract all output at once predmap_coded = tf.concat([prob_np, pred_dist], axis=-1, name='predmap-coded') #### #### if get_current_tower_context().is_training: ######## LOSS ### Distance regression loss loss_mse = pred_dist - true_dist loss_mse = loss_mse * loss_mse loss_mse = tf.reduce_mean(loss_mse, name='loss-mse') add_moving_summary(loss_mse) ### Nuclei Blob classification loss loss_bce = categorical_crossentropy(soft_np, one_np) loss_bce = tf.reduce_mean(loss_bce, name='loss-bce') add_moving_summary(loss_bce) ### combine the loss into single cost function self.cost = tf.identity(loss_mse + loss_bce, name='overall-loss') add_moving_summary(self.cost) #### add_param_summary(('.*/W', ['histogram'])) # monitor W #### logging visual sthg orig_imgs = tf.cast(orig_imgs , tf.uint8) tf.summary.image('input', orig_imgs, max_outputs=1) orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC") pred_np = colorize(prob_np[...,0], cmap='jet') true_np = colorize(true_np[...,0], cmap='jet') pred_dist = colorize(prob_dist[...,0], cmap='jet') true_dist = colorize(true_dist[...,0], cmap='jet') viz = tf.concat([orig_imgs, true_np, pred_np, true_dist, pred_dist,], 2) tf.summary.image('output', viz, max_outputs=1) return
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above input1, input2, input3, input4, label = inputs n_hidden = 64 n_classes = 5 n_steps = 25 output_num = 4 with tf.name_scope('sensor1'): output1 = LSTM_Network('sensor1', input1, n_steps, n_hidden, output_num, 1) with tf.name_scope('sensor2'): output2 = LSTM_Network('sensor2', input2, n_steps, n_hidden, output_num, 17) with tf.name_scope('sensor3'): output3 = LSTM_Network('sensor3', input3, n_steps, n_hidden, output_num, 17) with tf.name_scope('sensor4'): output4 = LSTM_Network('sensor4', input4, n_steps, n_hidden, output_num, 17) logits = Connect('cloud', [output1, output2, output3, output4]) \ .FullyConnected('fc1', 256, activation=tf.nn.relu) \ .FullyConnected('fc2', 5, activation=tf.identity)() tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name='cross_entropy_loss') # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def build_graph( self, cam1: tf.Tensor, cam2: tf.Tensor, mask: tf.Tensor, normal_gt: tf.Tensor, depth_gt: tf.Tensor, ): with tf.variable_scope("prepare"): repeat = [1 for _ in range(len(mask.shape))] repeat[-1] = 3 tbutil.two_side_by_side(cam1, cam2, "input", 5) mask3 = tf.tile(mask, repeat) normal, depth = self.network_architecture(cam1, cam2, mask) with tf.variable_scope("loss"): with tf.variable_scope("normal"): normal_loss = tf.reduce_mean( masked_loss(l1_loss(normal_gt * 2 - 1, normal * 2 - 1), mask3), name="normal_loss", ) add_moving_summary(normal_loss) tf.losses.add_loss(normal_loss, tf.GraphKeys.LOSSES) tbutil.two_side_by_side(normal_gt, normal, "normal", 5) with tf.variable_scope("depth"): depth_loss = tf.reduce_mean(masked_loss( l1_loss(depth_gt, depth), mask), name="depth_loss") add_moving_summary(depth_loss) tf.losses.add_loss(depth_loss, tf.GraphKeys.LOSSES) tbutil.two_side_by_side(depth_gt, depth, "depth", 5) if self.enable_consistency: with tf.variable_scope("consistency"): near = uncompressDepth(1) far = uncompressDepth(0) d = uncompressDepth(depth) h = tf.div_no_nan(d - near, far - near) sobel = tf.image.sobel_edges( h) # b,h,w,1,[dy,dx] - 1 because height has 1 channel dx = sobel[:, :, :, :, 1] # b,h,w,1 dy = -sobel[:, :, :, :, 0] # We're using a depth map instead of a height. Which means bright # values are at a greater depth. Thus, we need to invert the gradient texelSize = 1 / self.imgSize dz = tf.ones_like(dx) * texelSize * 2 n = normalize(tf.concat([dx, dy, dz], -1)) n = n * 0.5 + 0.5 consistency = masked_loss(l2_loss(n, normal), mask3) consistency_loss = ( tf.reduce_mean(consistency, name="consistency_loss") * self.consistency_loss) add_moving_summary(consistency_loss) tf.losses.add_loss(consistency_loss, tf.GraphKeys.LOSSES) tbutil.four_side_by_side(tf.tile(depth, repeat), n, normal, consistency, "consistency", 5) self.cost = tf.losses.get_total_loss(name="total_costs") add_moving_summary(self.cost) add_param_summary((".*/W", ["histogram"])) # monitor W return self.cost
def build_graph(self, image, label): """This function should build the model which takes the input variables and return cost at the end""" # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.layers.flatten(image) # image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with tf.variable_scope('encoder'): x = FullyConnected('fc1', image, 1000, activation=tf.nn.relu) x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu) mu = tf.identity(FullyConnected('fc_mu', x, 2, activation=None), 'mu') logvar = FullyConnected('fc_var', x, 2, activation=None) eps = tf.random_normal((tf.shape(x)[0], 2)) z = tf.identity(eps * tf.exp(0.5 * logvar) + mu, name='z') z_real = tf.random_normal((tf.shape(x)[0], 2)) self.f_loss = tf.reduce_mean( -tf.reduce_sum(self.f(z_real) - self.f(z), -1), name='f_loss') self.g_loss = tf.reduce_mean(tf.reduce_sum(-self.f(z), -1), name='g_loss') z_interp = tf.stop_gradient(z) + tf.random_uniform( (tf.shape(x)[0], 1)) * (z_real - tf.stop_gradient(z)) gradient_f = tf.gradients(self.f(z_interp), [z_interp])[0] gp_loss = tf.reduce_mean(tf.maximum( tf.norm(gradient_f, axis=-1) - 1, 0)**2, name='gp_loss') self.f_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'f') self.g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'encoder') with tf.variable_scope('decoder'): x = FullyConnected('fc1', z, 1000, activation=tf.nn.relu) x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu) rec = tf.identity( FullyConnected('fc_rec', x, IMAGE_SIZE * IMAGE_SIZE, activation=tf.nn.sigmoid), 'rec') kl_loss = -tf.reduce_sum(1 + logvar - mu * mu - tf.exp(logvar), -1) kl_loss = tf.reduce_mean(kl_loss, name='kl_loss') rec_loss = tf.reduce_mean(tf.reduce_sum(tf.square(rec - image), -1), name='rec_loss') # total_cost = rec_loss + kl_loss total_cost = rec_loss + gp_loss summary.add_moving_summary(rec_loss, kl_loss, self.f_loss, self.g_loss, gp_loss) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # the function should return the total cost to be optimized return total_cost
def _build_graph(self, inputs): image, edgemap = inputs image = image - tf.constant([104, 116, 122], dtype='float32') edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d') def branch(name, l, up): with tf.variable_scope(name): l = Conv2D('convfc', l, 1, kernel_shape=1, nl=tf.identity, use_bias=True, W_init=tf.constant_initializer(), b_init=tf.constant_initializer()) while up != 1: l = BilinearUpSample('upsample{}'.format(up), l, 2) up = up / 2 return l with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu): l = Conv2D('conv1_1', image, 64) l = Conv2D('conv1_2', l, 64) b1 = branch('branch1', l, 1) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2_1', l, 128) l = Conv2D('conv2_2', l, 128) b2 = branch('branch2', l, 2) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3_1', l, 256) l = Conv2D('conv3_2', l, 256) l = Conv2D('conv3_3', l, 256) b3 = branch('branch3', l, 4) l = MaxPooling('pool3', l, 2) l = Conv2D('conv4_1', l, 512) l = Conv2D('conv4_2', l, 512) l = Conv2D('conv4_3', l, 512) b4 = branch('branch4', l, 8) l = MaxPooling('pool4', l, 2) l = Conv2D('conv5_1', l, 512) l = Conv2D('conv5_2', l, 512) l = Conv2D('conv5_3', l, 512) b5 = branch('branch5', l, 16) final_map = Conv2D('convfcweight', tf.concat([b1, b2, b3, b4, b5], 3), 1, 1, W_init=tf.constant_initializer(0.2), use_bias=False, nl=tf.identity) costs = [] for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1)) xentropy = class_balanced_sigmoid_cross_entropy( b, edgemap, name='xentropy{}'.format(idx + 1)) costs.append(xentropy) # some magic threshold pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction') wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32) wrong = tf.reduce_mean(wrong, name='train_error') if get_current_tower_context().is_training: wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') costs.append(wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W self.cost = tf.add_n(costs, name='cost') add_moving_summary(costs + [wrong, self.cost])
def build_graph(self, image, label): image = image / 128.0 assert tf.test.is_gpu_available() image = tf.transpose(image, [0, 3, 1, 2]) def residual(name, l, increase_dim=False, first=False): shape = l.get_shape().as_list() in_channel = shape[1] if increase_dim: out_channel = in_channel * 2 stride1 = 2 else: out_channel = in_channel stride1 = 1 with tf.variable_scope(name): b1 = l if first else BNReLU(l) c1 = Conv2D('conv1', b1, out_channel, strides=stride1, activation=BNReLU) c2 = Conv2D('conv2', c1, out_channel) if increase_dim: l = AvgPooling('pool', l, 2) l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]]) l = c2 + l return l with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, kernel_size=3, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 16, activation=BNReLU) l = residual('res1.0', l, first=True) for k in range(1, self.n): l = residual('res1.{}'.format(k), l) # 32,c=16 l = residual('res2.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res2.{}'.format(k), l) # 16,c=32 l = residual('res3.0', l, increase_dim=True) for k in range(1, self.n): l = residual('res3.' + str(k), l) l = BNReLU('bnlast', l) # 8,c=64 l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 10) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W return tf.add_n([cost, wd_cost], name='cost')