def build_graph(self, x, y): with freeze_variables(stop_gradient=False, skip_collection=True): step_size = FLAGS.step_size / 255.0 max_epsilon = FLAGS.max_epsilon / 255.0 x_max = tf.clip_by_value(x + max_epsilon, 0., 1.0) x_min = tf.clip_by_value(x - max_epsilon, 0., 1.0) logits, _, _ = network.model(x, FLAGS.attack_networks[0]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=y) noise = tf.gradients(loss, x)[0] with tf.variable_scope('RHP'): noise = conv_with_rn(noise) with freeze_variables(stop_gradient=False, skip_collection=True): G = tf.get_default_graph() with G.gradient_override_map({"Sign": "Identity"}): x = x + step_size * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) # evaluate after add perturbation logits, _, _ = network.model(x, FLAGS.attack_networks[0]) loss_to_optimize = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y), name='train_loss') return -loss_to_optimize
def build_graph(self, *args): costs = [] for i, name in enumerate(self.agent_names): joint_state, action, reward, isOver, comb_mask, joint_fine_mask = args[i * 6:(i + 1) * 6] with tf.variable_scope(name): with conditional(name is None, varreplace.freeze_variables()): state = tf.identity(joint_state[:, 0, :, :, :], name='state') fine_mask = tf.identity(joint_fine_mask[:, 0, :], name='fine_mask') self.predict_value = self.get_DQN_prediction(state, comb_mask, fine_mask) if not get_current_tower_context().is_training: continue # reward = tf.clip_by_value(reward, -1, 1) next_state = tf.identity(joint_state[:, 1, :, :, :], name='next_state') next_fine_mask = tf.identity(joint_fine_mask[:, 1, :], name='next_fine_mask') action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True): # we are alternating between comb and fine states targetQ_predict_value = self.get_DQN_prediction(next_state, tf.logical_not(comb_mask), next_fine_mask) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state, tf.logical_not(comb_mask), next_fine_mask) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) # target = tf.Print(target, [target], summarize=100) # tf.assert_greater(target, -100., message='target error') # tf.assert_greater(pred_action_value, -100., message='pred value error') # pred_action_value = tf.Print(pred_action_value, [pred_action_value], summarize=100) l2_loss = tensorpack.regularize_cost(name + '/dqn.*W{1}', l2_regularizer(1e-3)) # cost = tf.losses.mean_squared_error(target, pred_action_value) with tf.control_dependencies([tf.assert_greater(target, -100., message='target error'), tf.assert_greater(pred_action_value, -100., message='pred value error')]): cost = tf.losses.huber_loss( target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary((name + '.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) costs.append(cost) if not get_current_tower_context().is_training: return return tf.add_n([costs[i] * self.cost_weights[i] for i in range(3)])
def resnet_backbone(image, num_blocks, group_func, group_func_dilation, block_func, block_func_dilation): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): with freeze_variables(stop_gradient=True, skip_collection=True): logits = (LinearWrap(image).Conv2D( 'conv0', 64, 7, strides=2, activation=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2).apply(group_func_dilation, 'group2', block_func_dilation, 256, num_blocks[2], 1, 2).apply(group_func_dilation, 'group3', block_func_dilation, 512, num_blocks[3], 1, 4)) # logits = (logits.Conv2D('conv102', 21, 1, stride=1, activation=tf.identity)()) logits = (logits.apply(aspp, 'aspp', 21)()) # logits = logits.Conv2D('conv102', 21, 1, stride=1, nl=tf.identity)() # tf.get_default_graph().clear_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # with tf.variable_scope('conv102', reuse=True): # W = tf.get_variable('W') # tf.add_to_collection(tf.GraphKeys.TRAINABLE_VARIABLES, W) return logits
def fastrcnn_outputs(feature, num_classes, class_agnostic_regression=False): """ Args: feature (any shape): num_classes(int): num_category + 1 class_agnostic_regression (bool): if True, regression to N x 1 x 4 Returns: cls_logits: N x num_class classification logits 2-D reg_logits: N x num_class x 4 or Nx2x4 if class agnostic 3-D """ # cls with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): classification = FullyConnected( 'class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) num_classes_for_box = 1 if class_agnostic_regression else num_classes # reg box_regression = FullyConnected( 'box', feature, num_classes_for_box * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes_for_box, 4), name='output_box') return classification, box_regression
def resnet_conv5(image, num_block): with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): with backbone_scope(freeze=False): l = resnet_group('group3', image, resnet_bottleneck, 512, num_block, 2) return l
def resnet_c4_backbone(image, num_blocks): with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): assert len(num_blocks) == 3 freeze_at = cfg.BACKBONE.FREEZE_AT with backbone_scope(freeze=freeze_at > 0): l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return c4
def rounded(label, factor=MAX_LABEL, name='quantized'): with G.gradient_override_map({"Round": "Identity"}): with freeze_variables(): with tf.name_scope(name=name): # label = cvt2imag(label, maxVal=factor) # label = tf.round(label) # label = cvt2tanh(label, maxVal=factor) # cvt from -1 ~ 1 to 0 255 # label = cvt2imag(label, maxVal=255.0) cond0 = tf.equal(label, -1.0 * tf.ones_like(label)) label = tf.where( cond0, tf.zeros_like(label), label, name='removedBackground') # From -1 to 0 label = label * factor # From 0~1 to 0~MAXLABEL label = tf.round(label) label = label / factor # From 0~MAXLABEL to 0~1 cond1 = tf.equal(label, 0.0 * tf.zeros_like(label)) label = tf.where( cond1, -1.0 * tf.ones_like(label), label, name='addedBackground') # From -1 to 0 return tf.identity(label, name=name)
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context( argscope(BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def rpn_head(featuremap, channel, num_anchors): # ,filters=1024,15 """ Returns: label_logits: fHxfWxNA box_logits: fHxfWxNAx4 """ with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): with argscope( Conv2D, data_format='channels_first', kernel_initializer=tf.random_normal_initializer(stddev=0.01)): hidden = Conv2D( 'conv0', featuremap, channel, 3, activation=tf.nn.relu) # kernel_size=3,strides=(1,1) label_logits = Conv2D( 'class', hidden, num_anchors, 1) # kernel_size = 1;filters = num_anchors = 15 box_logits = Conv2D('box', hidden, 4 * num_anchors, 1) # 1, NA(*4), im/16, im/16 (NCHW) label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4) box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4 return label_logits, box_logits
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context(argscope( BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context(freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def rounded(label, factor=MAX_LABEL, name='quantized'): with G.gradient_override_map({"Round": "Identity"}): with freeze_variables(): with tf.name_scope(name=name): label = cvt2imag(label, maxVal=factor) label = tf.round(label) label = cvt2tanh(label, maxVal=factor) return tf.identity(label, name=name)
def backbone_scope(freeze, freeze_bn=False): with ExitStack() as stack: if freeze_bn: stack.enter_context(argscope(BatchNorm, training=False)) if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) yield
def build_graph(self, comb_state, action, reward, isOver): comb_state = tf.cast(comb_state, tf.float32) input_rank = comb_state.shape.rank state = tf.slice(comb_state, [0] * input_rank, [-1] * (input_rank - 1) + [self.history], name='state') self.predict_value = self.get_DQN_prediction(state) if not self.training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0] * (input_rank - 1) + [1], [-1] * (input_rank - 1) + [self.history], name='next_state') next_state = tf.reshape(next_state, self._stacked_state_shape) action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(input_tensor=self.predict_value * action_onehot, axis=1) # N, max_pred_reward = tf.reduce_mean(input_tensor=tf.reduce_max( input_tensor=self.predict_value, axis=1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.compat.v1.variable_scope( 'target'), varreplace.freeze_variables(skip_collection=True): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(input_tensor=targetQ_predict_value, axis=1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(input=next_predict_value, axis=1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(input_tensor=targetQ_predict_value * predict_onehot, axis=1) target = reward + (1.0 - tf.cast( isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.compat.v1.losses.huber_loss( target, pred_action_value, reduction=tf.compat.v1.losses.Reduction.MEAN) summary.add_param_summary( ('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def build_graph(self, query, key): # setup queue queue_init = tf.math.l2_normalize(tf.random.normal( [self.queue_size, self.feature_dim]), axis=1) queue = tf.get_variable('queue', initializer=queue_init, trainable=False) queue_ptr = tf.get_variable('queue_ptr', [], initializer=tf.zeros_initializer(), dtype=tf.int64, trainable=False) tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, queue) tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, queue_ptr) # query encoder q_feat = self.net.forward(query) # NxC q_feat = tf.math.l2_normalize(q_feat, axis=1) # key encoder shuffled_key, shuffle_idxs = batch_shuffle(key) shuffled_key.set_shape([self.batch_size, None, None, None]) with tf.variable_scope("momentum_encoder"), \ varreplace.freeze_variables(skip_collection=True), \ argscope(BatchNorm, ema_update='skip'): # don't maintain EMA (will not be used at all) key_feat = xla.compile(lambda: self.net.forward(shuffled_key))[0] # key_feat = self.net.forward(shuffled_key) key_feat = tf.math.l2_normalize(key_feat, axis=1) # NxC key_feat = batch_unshuffle(key_feat, shuffle_idxs) key_feat = tf.stop_gradient(key_feat) # loss l_pos = tf.reshape(tf.einsum('nc,nc->n', q_feat, key_feat), (-1, 1)) # nx1 l_neg = tf.einsum('nc,kc->nk', q_feat, queue) # nxK logits = tf.concat([l_pos, l_neg], axis=1) # nx(1+k) logits = logits * (1 / self.temp) labels = tf.zeros(self.batch_size, dtype=tf.int64) # n loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) loss = tf.reduce_mean(loss, name='xentropy-loss') acc = tf.reduce_mean(tf.cast( tf.equal(tf.math.argmax(logits, axis=1), labels), tf.float32), name='train-acc') # update queue (depend on l_neg) with tf.control_dependencies([l_neg]): queue_push_op = self.push_queue(queue, queue_ptr, key_feat) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, queue_push_op) wd_loss = regularize_cost(".*", l2_regularizer(1e-4), name='l2_regularize_loss') add_moving_summary(acc, loss, wd_loss) total_cost = tf.add_n([loss, wd_loss], name='cost') return total_cost
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope """ with ExitStack() as stack: if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) yield
def rpn(self, image, features, inputs): if cfg.EXTRACT_GT_FEATURES: boxes = inputs['roi_boxes'] return BoxProposals(boxes), tf.constant(0, dtype=tf.float32) if cfg.BACKBONE.FREEZE_AT > 3: with freeze_variables(stop_gradient=False, skip_collection=True): return super().rpn(image, features, inputs) else: return super().rpn(image, features, inputs)
def build_graph(self, comb_state, action, reward, isOver): comb_state = tf.cast(comb_state, tf.float32) comb_state = tf.reshape(comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel]) state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1]) state = tf.reshape(state, self._shape4d_for_prediction, name='state') self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state') next_state = tf.reshape(next_state, self._shape4d_for_prediction) action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables( skip_collection=True): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast( isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.losses.huber_loss(target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary( ('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def build_graph(self, joint_state, next_mask, action, reward, isOver): state = tf.identity(joint_state[:, 0, ...], name='state') self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return next_state = tf.identity(joint_state[:, 1, ...], name='next_state') action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables( skip_collection=True): # we are alternating between comb and fine states targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN self.greedy_choice = tf.argmax(targetQ_predict_value + (tf.to_float(next_mask) * 1e4), 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax( next_predict_value + (tf.to_float(next_mask) * 1e4), 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast( isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) l2_loss = tensorpack.regularize_cost('.*W{1}', l2_regularizer(1e-3)) # cost = tf.losses.mean_squared_error(target, pred_action_value) cost = tf.losses.huber_loss(target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def build_graph(self, input, label): with argscope(BatchNorm, training=False), \ varreplace.freeze_variables(skip_collection=True): from tensorflow.python.compiler.xla import xla feature = xla.compile(lambda: self.net.forward(input))[0] # feature = self.net.forward(input) feature = tf.stop_gradient(feature) # double safe logits = FullyConnected( 'linear_cls', feature, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.constant_initializer()) tf.nn.softmax(logits, name='prob') loss = self.compute_loss_and_error(logits, label) # weight decay is 0 add_moving_summary(loss) return loss
def build_graph(self, comb_state, action, reward, isOver): comb_state = tf.cast(comb_state, tf.float32) comb_state = tf.reshape( comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel]) state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1]) state = tf.reshape(state, self._shape4d_for_prediction, name='state') self.predict_value = self.get_DQN_prediction(state) if not get_current_tower_context().is_training: return reward = tf.clip_by_value(reward, -1, 1) next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state') next_state = tf.reshape(next_state, self._shape4d_for_prediction) action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True): targetQ_predict_value = self.get_DQN_prediction(next_state) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction(next_state) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) cost = tf.losses.huber_loss( target, pred_action_value, reduction=tf.losses.Reduction.MEAN) summary.add_param_summary(('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms'])) # monitor all W summary.add_moving_summary(cost) return cost
def dense_net(name): with tf.variable_scope('MASK') as MASK_scope: assert MASK_scope.name == 'MASK' with freeze_variables(stop_gradient=False, skip_collection=False): l = conv('conv0', image, 16, 1) store_con = [] store_ker = [] with tf.variable_scope('block1') as scope: with tf.variable_scope('dense_bottleneck.0'): for i in range(LAYER_NUM): l, store_con, store_ker = add_layer('dense_layer.{}'.format(i), l, store_con, store_ker, 16, MASK_scope, reuse=False, i=i) l = add_transition('transition', l, 16) for i in range(1, self.N): l = add_bottleneck('dense_bottleneck.{}'.format(i), l, store_con, store_ker, 16, MASK_scope, reuse=True, increase_dim=True if i == (self.N-1) else False, pool=True) with tf.variable_scope('block2') as scope: for i in range(self.N): l = add_bottleneck('dense_bottleneck.{}'.format(i), l, store_con, store_ker, 32, MASK_scope, reuse=True, increase_dim=True if i == (self.N-1) else False, pool=False) with tf.variable_scope('block3') as scope: for i in range(self.N): l = add_bottleneck('dense_bottleneck.{}'.format(i), l, store_con, store_ker, 64, MASK_scope, reuse=True, increase_dim=True if i == (self.N-1) else False, pool=True) with tf.variable_scope('block4') as scope: for i in range(self.N): l = add_bottleneck('dense_bottleneck.{}'.format(i), l, store_con, store_ker, 128, MASK_scope, reuse=True,increase_dim=True if i == (self.N-1) else False, pool=False) l = BatchNorm('bnlast', l) l = tf.nn.relu(l) l = GlobalAvgPooling('gap', l) logits = FullyConnected('fc', l, out_dim=10, nl=tf.identity) return logits
def backbone_scope(freeze): """ Args: freeze (bool): whether to freeze all the variables under the scope 创建如下上下文 Conv,MaxPool,BatchNorm的输入格式都是(NCHW) Conv没有bias,使用Norm->Relu作为activation 1)如果BACKBONE.NORM=FreezeBN,BatchNorm.traing=False 2)如果BACKBONE.NORM=SyncBN,BatchNorm.sync_statistics=nccl|horovod(收集所有GPU的batch,求mean,var) a)如果freeze=True,所有变量不会被训练,变量加入到MODEL_VARIABLES而不是TRAINABLE_VARIABLES """ def nonlin(x): x = get_norm()(x) return tf.nn.relu(x) with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False, activation=nonlin, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')), \ ExitStack() as stack: if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']: if freeze or cfg.BACKBONE.NORM == 'FreezeBN': stack.enter_context(argscope(BatchNorm, training=False)) else: stack.enter_context( argscope(BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod')) if freeze: stack.enter_context( freeze_variables(stop_gradient=False, skip_collection=True)) else: # the layers are not completely freezed, but we may want to only freeze the affine if cfg.BACKBONE.FREEZE_AFFINE: stack.enter_context(custom_getter_scope(freeze_affine_getter)) yield
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits (N x num_category x 2s x 2s): """ assert norm in [None, 'GN'], norm l = feature with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_category, 1) return l
def build_graph(self, joint_state, action, reward, isOver, joint_history, joint_state_refine, action_refine, reward_refine, isOver_refine, joint_history_refine): # vgg are shared among stage-1 and stage-2 state = self.get_features( tf.identity(joint_state[:, 0, ...], name='state')) history = tf.identity(joint_history[:, 0, ...], name='history') next_state = self.get_features( tf.identity(joint_state[:, 1, ...], name='next_state')) next_history = tf.identity(joint_history[:, 1, ...], name='next_history') state_refine = self.get_features( tf.identity(joint_state_refine[:, 0, ...], name='state_refine')) history_refine = tf.identity(joint_history_refine[:, 0, ...], name='history_refine') next_state_refine = self.get_features( tf.identity(joint_state_refine[:, 1, ...], name='next_state_refine')) next_history_refine = tf.identity(joint_history_refine[:, 1, ...], name='next_history_refine') total_cost = [] for i, data in enumerate([ [state, history, next_state, next_history, action, reward, isOver], [ state_refine, history_refine, next_state_refine, next_history_refine, action_refine, reward_refine, isOver_refine ] ]): with tf.variable_scope('stage%d' % (i + 1)): st, hist, next_st, next_hist, act, rw, over = data predict_value = self.get_DQN_prediction( st, hist, self.num_actions[i]) if not get_current_tower_context().is_training: if i == 0: continue elif i == 1: return action_onehot = tf.one_hot(act, self.num_actions[i], 1.0, 0.0) pred_action_value = tf.reduce_sum(predict_value * action_onehot, 1) # N, # max_pred_reward = tf.reduce_mean(pred_action_value, name='predict_reward') max_pred_reward = tf.reduce_mean(tf.reduce_max( predict_value, 1), name='predict_reward') summary.add_moving_summary(max_pred_reward) with tf.variable_scope('target'), varreplace.freeze_variables( skip_collection=True): targetQ_predict_value = self.get_DQN_prediction( next_st, next_hist, self.num_actions[i]) # NxA if self.method != 'Double': # DQN best_v = tf.reduce_max(targetQ_predict_value, 1) # N, else: # Double-DQN next_predict_value = self.get_DQN_prediction( next_st, next_hist, self.num_actions[i]) greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(greedy_choice, self.num_actions[i], 1.0, 0.0) best_v = tf.reduce_sum( targetQ_predict_value * predict_onehot, 1) target = rw + (1.0 - tf.cast( over, tf.float32)) * self.gamma * tf.stop_gradient(best_v) average_target = tf.reduce_mean(target, name='average_target') # average_target = tf.Print(average_target, [], name='average_target') cost = tf.losses.mean_squared_error( target, pred_action_value, reduction=tf.losses.Reduction.MEAN) total_cost.append(cost) summary.add_moving_summary(cost) summary.add_moving_summary(average_target) return tf.add_n(total_cost)
def build_graph(self, pc, pc_feature): pc_symmetry = tf.stack([-pc[..., 0], pc[..., 1], pc[..., 2]], -1) # -x dist2sym = tf.reduce_sum((pc[:, :, None] - pc_symmetry[:, None])**2, -1) nearest_idx = tf.argmin(dist2sym, -1, output_type=tf.int32) # smoothnet encoder, only local features are used embedding = SmoothNet(pc_feature, self.cfg) with tf.variable_scope('encoder'): z = tf.sigmoid(embedding[:, :, -1], name='z') output_x = tf.nn.l2_normalize(embedding[:, :, :-1], axis=-1, name='feature') gp_loss = 0. loss_d = 0. loss_g = 0. if get_current_tower_context().is_training: beta_dist = tf.distributions.Beta( concentration1=self.cfg.beta.concentration1, concentration0=self.cfg.beta.concentration0) with tf.variable_scope('GAN'): real_z = beta_dist.sample(tf.shape(z)) fake_val = self.discriminator(tf.stop_gradient(z)) real_val = self.discriminator(real_z) loss_d = tf.reduce_mean(fake_val - real_val, name='loss_d') with varreplace.freeze_variables(stop_gradient=True): loss_g = tf.reduce_mean(-self.discriminator(z), name='loss_g') z_interp = z + tf.random_uniform( (tf.shape(fake_val)[0], 1)) * (real_z - z) gradient_f = tf.gradients(self.discriminator(z_interp), [z_interp])[0] gp_loss = tf.reduce_mean(tf.maximum( tf.norm(gradient_f, axis=-1) - 1, 0)**2, name='gp_loss') code = tf.concat([ tf.reduce_max(tf.nn.relu(output_x) * z[..., None], 1), tf.reduce_max(tf.nn.relu(-output_x) * z[..., None], 1) ], axis=-1, name='code') code = FullyConnected('fc_global', code, self.cfg.topnet.code_nfts, activation=None) # topnet decoder tarch = get_arch(self.cfg.topnet.nlevels, self.cfg.num_points) def create_level(level, input_channels, output_channels, inputs, bn): with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE): features = mlp_conv(inputs, [ input_channels, int(input_channels / 2), int(input_channels / 4), int(input_channels / 8), output_channels * int(tarch[level]) ], get_current_tower_context().is_training, bn) features = tf.reshape( features, [tf.shape(features)[0], -1, output_channels]) return features Nin = self.cfg.topnet.nfeat + self.cfg.topnet.code_nfts Nout = self.cfg.topnet.nfeat bn = True N0 = int(tarch[0]) nlevels = len(tarch) with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): level0 = mlp(code, [256, 64, self.cfg.topnet.nfeat * N0], get_current_tower_context().is_training, bn=True) level0 = tf.tanh(level0, name='tanh_0') level0 = tf.reshape(level0, [-1, N0, self.cfg.topnet.nfeat]) outs = [ level0, ] for i in range(1, nlevels): if i == nlevels - 1: Nout = 3 bn = False inp = outs[-1] y = tf.expand_dims(code, 1) y = tf.tile(y, [1, tf.shape(inp)[1], 1]) y = tf.concat([inp, y], 2) outs.append( tf.tanh(create_level(i, Nin, Nout, y, bn), name='tanh_%d' % (i))) reconstruction = tf.reshape(outs[-1], [-1, self.cfg.num_points, 3], name='recon_pc') loss_recon = chamfer(reconstruction, pc) loss_recon = tf.identity(self.cfg.recon_factor * tf.reduce_mean(loss_recon), name='recon_loss') batch_size = tf.shape(output_x)[0] batch_idx = tf.tile( tf.range(batch_size)[:, None], [1, tf.shape(nearest_idx)[1]]) feature_sym = tf.gather_nd(embedding, tf.stack([batch_idx, nearest_idx], -1)) loss_sym = tf.identity( self.cfg.symmetry_factor * tf.reduce_mean(tf.reduce_sum(tf.abs(feature_sym - embedding), -1)), 'symmetry_loss') wd_cost = tf.multiply(1e-4, regularize_cost('.*(_W|kernel)', tf.nn.l2_loss), name='regularize_loss') loss_gan = loss_d + loss_g + gp_loss total_cost = tf.add_n([loss_recon, wd_cost, loss_gan, loss_sym], name='total_cost') summary.add_moving_summary(loss_recon, loss_sym) summary.add_param_summary(['.*(_W|kernel)', ['histogram', 'rms']]) return total_cost
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if cfg.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW with varreplace.freeze_variables(stop_gradient=True, skip_collection=True): featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) # freeze # featuremap = tf.stop_gradient(featuremap) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) # freeze # rpn_label_logits = tf.stop_gradient(rpn_label_logits) # rpn_box_logits = tf.stop_gradient(rpn_box_logits) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1], useDropout=True) # nxcx7x7 # Keep C5 feature to be shared with mask branch # feature_fastrcnn = tf.stop_gradient(feature_fastrcnn) feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS, useDropout=True) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(fastrcnn|rpn|group3)/.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='final_masks')