def resnet_conv5(image, num_block): with argscope([Conv2D, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, nl=tf.identity, use_bias=False), \ argscope(BatchNorm, use_local_stat=False): # 14x14: l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, stride=2) return l
def pretrained_resnet_conv4(image, num_blocks): assert len(num_blocks) == 3 with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, nl=tf.identity, use_bias=False), \ argscope(BatchNorm, use_local_stat=False): l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID') l = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) # TODO replace var by const to enable folding l = tf.stop_gradient(l) l = resnet_group(l, 'group1', resnet_bottleneck, 128, num_blocks[1], 2) l = resnet_group(l, 'group2', resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return l
def vgg_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_IN'), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw): logits = (LinearWrap(image) .Conv2DQuant('conv1', 96, 7, stride=2, nl=tf.nn.relu, is_quant=False) .MaxPooling('pool1', shape=2, stride=2, padding='VALID') # 56 .BNReLUQuant('bnquant2_0') .Conv2DQuant('conv2_1', 256, 3, nl=getBNReLUQuant) .Conv2DQuant('conv2_2', 256, 3, nl=getBNReLUQuant) .Conv2DQuant('conv2_3', 256, 3) .MaxPooling('pool2', shape=2, stride=2, padding='VALID') # 28 .BNReLUQuant('bnquant3_0') .Conv2DQuant('conv3_1', 512, 3, nl=getBNReLUQuant) .Conv2DQuant('conv3_2', 512, 3, nl=getBNReLUQuant) .Conv2DQuant('conv3_3', 512, 3) .MaxPooling('pool3', shape=2, stride=2, padding='VALID') # 14 .BNReLUQuant('bnquant4_0') .Conv2DQuant('conv4_1', 512, 3, nl=getBNReLUQuant) .Conv2DQuant('conv4_2', 512, 3, nl=getBNReLUQuant) .Conv2DQuant('conv4_3', 512, 3) .MaxPooling('pool4', shape=2, stride=2, padding='VALID') # 7 .BNReLUQuant('bnquant5') .Conv2DQuant('fc5', 4096, 7, nl=getfcBNReLUQuant, padding='VALID', use_bias=True) .Conv2DQuant('fc6', 4096, 1, nl=getfcBNReLU, padding='VALID', use_bias=True) .FullyConnected('fc7', out_dim=1000, nl=tf.identity, W_init=variance_scaling_initializer(mode='FAN_IN'))()) return logits
def googlenet_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_IN'), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw, is_quant=True if qw > 0 else False): logits = (LinearWrap(image) .Conv2DQuant('conv1', 64, 7, stride=2, is_quant=False) .MaxPooling('pool1', shape=3, stride=2, padding='SAME') .BNReLUQuant('pool1/out') .Conv2DQuant('conv2/3x3_reduce', 192, 1, nl=getBNReLUQuant) .Conv2DQuant('conv2/3x3', 192, 3) .MaxPooling('pool2', shape=3, stride=2, padding='SAME') .BNReLUQuant('pool2/out') .apply(inception_block, 'incpetion_3a', 96, 128, 32) .apply(inception_block, 'incpetion_3b', 192, 192, 96, is_last_block=True) .apply(inception_block, 'incpetion_4a', 256, 208, 48) .apply(inception_block, 'incpetion_4b', 224, 224, 64) .apply(inception_block, 'incpetion_4c', 192, 256, 64) .apply(inception_block, 'incpetion_4d', 176, 288, 64) .apply(inception_block, 'incpetion_4e', 384, 320, 128, is_last_block=True) .apply(inception_block, 'incpetion_5a', 384, 320, 128) .apply(inception_block, 'incpetion_5b', 512, 384, 128, is_last_block=True, is_last=True) .GlobalAvgPooling('pool5') .FullyConnected('linear', out_dim=1000, nl=tf.identity)()) return logits
def get_logit_binary(image, num_blocks, block_func, num_class=2): with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format="NCHW"): return resnet_backbone_dropout(image, num_blocks, preresnet_group if config.RESNET_MODE == 'preact' else resnet_group, block_func, num_class=num_class)
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ ImageNet in 1 Hour, Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01 """ return logits
def maskrcnn_upXconv_head(feature, num_category, seed_gen, num_convs, norm=None, fp16=False): """ Args: feature: roi feature maps, Num_boxes x NumChannel x H_roi x W_roi, num_category(int): Number of total classes num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits: Num_boxes x num_category x (2 * H_roi) x (2 * W_roi) """ assert norm in [None, 'GN'], norm l = feature if fp16: l = tf.cast(l, tf.float16) with mixed_precision_scope(mixed=fp16): with argscope([Conv2D, Conv2DTranspose], data_format='channels_first' if cfg.TRAIN.MASK_NCHW else 'channels_last', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', seed=seed_gen.next(), distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu, seed=seed_gen.next()) if norm is not None: if fp16: l = tf.cast(l, tf.float32) l = GroupNorm('gn{}'.format(k), l) if fp16: l = tf.cast(l, tf.float16) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu, seed=seed_gen.next()) # 2x upsampling l = Conv2D('conv', l, num_category, 1, seed=seed_gen.next()) if fp16: l = tf.cast(l, tf.float32) if not cfg.TRAIN.MASK_NCHW: l = tf.transpose(l, [0, 3, 1, 2]) return l
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL def upsample2x(name, x): return FixedUnPooling(name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with argscope( Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1.)): lat_2345 = [ Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features) ] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [ Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1]) ] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D], use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): logits = (LinearWrap(image).Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)()) logits = (LinearWrap(logits).MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)()) logits = (LinearWrap(logits).GlobalAvgPooling('gap')()) logits = (LinearWrap(logits).FullyConnected('linear', 1000)()) return logits
def _get_NN_prediction(self, image): self._create_unnary_variables_with_summary( image[:, 0, :, 0], (10, 10, 6, 6, 6), ("rewards", "levels", "lives0", "lives1", "lives2")) NUMBER_OF_REWARD_EVENTS = 10 rewards_events = [] for x in xrange(NUMBER_OF_REWARD_EVENTS): rewards_events.append(tf.reshape(image[:, 0, x, 0], (-1, 1))) image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5) lc0 = MaxPooling('pool0', lc0, 2) lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5) lc1 = MaxPooling('pool1', lc1, 2) lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4) lc2 = MaxPooling('pool2', lc2, 2) lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3) policies = [] values = [] for x in xrange(10): lfc0 = FullyConnected('fc0{}'.format(x), lc3, 512, nl=tf.identity) lfc0 = PReLU('prelu{}'.format(x), lfc0) policy = FullyConnected('fc-pi{}'.format(x), lfc0, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v{}'.format(x), lfc0, 1, nl=tf.identity) policies.append(policy) values.append(value) weighted_policies = [] weighted_values = [] for weight, policy, value in zip(rewards_events, policies, values): weighted_policies.append(tf.multiply(weight, policy)) weighted_values.append(tf.multiply(weight, value)) policy = tf.add_n(weighted_policies) value = tf.add_n(weighted_values) # if DEBUGING_INFO: # summary.add_activation_summary(lc0, "conv_0") # summary.add_activation_summary(lc1, "conv_1") # summary.add_activation_summary(lc2, "conv_2") # summary.add_activation_summary(lc3, "conv_3") # summary.add_activation_summary(lfc0, "fc0") # summary.add_activation_summary(policy, "policy") # summary.add_activation_summary(value, "fc-v") return policy, value
def resnet_backbone(image, num_blocks, group_func, block_func, activation_name='relu'): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # this padding manner follows https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L358 l = fixed_padding(image, 7) l = Conv2D('conv0', l, 64, 7, strides=2, activation=functools.partial(BNActivation, activation_name=activation_name), padding='valid') l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ ImageNet in 1 Hour, Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01 """ return logits
def rpn_head(featuremap, channel, num_anchors, seed_gen, fp16=False): """ The RPN head that takes the feature map from the FPN and outputs bounding box logits. For every pixel on the feature maps, there are a certain number of anchors. The output will be: label logits: indicate whether there is an object for a certain anchor in one pixel box logits: The encoded box logits from fast-rcnn paper https://arxiv.org/abs/1506.01497 page 5, in order to be consistent with the ground truth encoded boxes Args: featuremap: feature map for a single FPN layer, i.e. one from P23456, BS x NumChannel x H_feature x W_feature channel: NumChannel of the feature map, scalar, default 256 num_anchors(NA): # of anchors for each pixel in the current feature map, scalar, default 3 Returns: label_logits: BS x H_feature x W_feature x NA box_logits: BS x (NA * 4) x H_feature x W_feature, encoded """ if fp16: featuremap = tf.cast(featuremap, tf.float16) with mixed_precision_scope(mixed=fp16): with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.random_normal_initializer( stddev=0.01, seed=seed_gen.next())): hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu, seed=seed_gen.next()) # BS x NumChannel x H_feature x W_feature label_logits = Conv2D('class', hidden, num_anchors, 1, seed=seed_gen.next()) # BS x NA x H_feature x W_feature box_logits = Conv2D('box', hidden, 4 * num_anchors, 1, seed=seed_gen.next()) # BS x (NA*4) x H_feature x W_feature label_logits = tf.transpose( label_logits, [0, 2, 3, 1]) # BS x H_feature x W_feature x NA if fp16: label_logits = tf.cast(label_logits, tf.float32) box_logits = tf.cast(box_logits, tf.float32) return label_logits, box_logits
def resnet_backbone(image, resnet_depth, num_blocks, group_func, block_func, activations_datatype_size_byte, weights_datatype_size_byte, results_datatype_size_byte, systolic_array_height, systolic_array_width, accumulator_array_height, mpusim_logdir): constant_init = tf.constant_initializer(1) with argscope(mpusim_conv2d, data_format='NHWC'), \ argscope([mpusim_conv2d, mpusim_fully_connected], activation=tf.identity, use_bias=False, kernel_initializer=constant_init, activations_datatype_size_byte=activations_datatype_size_byte, weights_datatype_size_byte=weights_datatype_size_byte, results_datatype_size_byte=results_datatype_size_byte, systolic_array_height=systolic_array_height, systolic_array_width=systolic_array_width, activation_fifo_depth=8, accumulator_array_height=accumulator_array_height, log_file_output_dir=mpusim_logdir, model_name='resnet_{}_sys_arr_h_{}_sys_arr_w_{}_acc_arr_h_{}'.format(resnet_depth, systolic_array_height, systolic_array_width, accumulator_array_height)): l = mpusim_conv2d('conv0', image, 64, 7, stride=2, activation=BNReLU) l = MaxPooling('pool0', l, shape=3, stride=2, padding='SAME') l = group_func(l, 'group0', block_func, 64, num_blocks[0], 1) l = group_func(l, 'group1', block_func, 128, num_blocks[1], 2) l = group_func(l, 'group2', block_func, 256, num_blocks[2], 2) l = group_func(l, 'group3', block_func, 512, num_blocks[3], 2, is_last=True) l = GlobalAvgPooling('gap', l) return mpusim_fully_connected('linear', l, 1000)
def fpn_model(features, seed_gen, fp16=False): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): dtype_str = 'float16' if fp16 else 'float32' return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype=dtype_str), data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with mixed_precision_scope(mixed=fp16): with argscope(Conv2D, data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1., seed=seed_gen.next())): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1, seed=seed_gen.next()) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3, seed=seed_gen.next()) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last', padding='VALID') if fp16: return [tf.cast(l, tf.float32) for l in p2345] + [tf.cast(p6, tf.float32)] return p2345 + [p6]
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): logits = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) .GlobalAvgPooling('gap') .FullyConnected('linear', 1000, nl=tf.identity)()) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT')): logits = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) .GlobalAvgPooling('gap') .FullyConnected('linear', 1000, nl=tf.identity)()) return logits
def resnet(input_, option): mode = option.mode DEPTH = option.depth bottleneck = {'se': se_resnet_bottleneck}[mode] cfg = { 50: ([3, 4, 6, 3], bottleneck), } defs, block_func = cfg[DEPTH] group_func = resnet_group with argscope(Conv2D, use_bias=False, kernel_initializer= \ tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): l = Conv2D('conv0', input_, 64, 7, strides=2, activation=BNReLU) if option.gating_position[0]: l = gating_op(l, option) l = MaxPooling('pool0', l, 3, strides=2, padding='SAME') if option.gating_position[1]: l = gating_op(l, option) l = group_func('group0', l, block_func, 64, defs[0], 1, option) if option.gating_position[2]: l = gating_op(l, option) l = group_func('group1', l, block_func, 128, defs[1], 2, option) if option.gating_position[3]: l = gating_op(l, option) l = group_func('group2', l, block_func, 256, defs[2], 2, option) if option.gating_position[4]: l = gating_op(l, option) l = group_func('group3', l, block_func, 512, defs[3], 1, option) if option.gating_position[5]: l = gating_op(l, option) p_logits = GlobalAvgPooling('gap', l) logits = FullyConnected('linearnew', p_logits, option.number_of_class) return logits, l
def resnet_backbone(image, num_blocks, group_func, block_func, classes=1000): # with argscope(Conv2D, use_bias=False, # kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out'), use_bias=False): # Note that TF pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) latent = l l = GlobalAvgPooling('gap', l) l = Dropout('dropout', l, 0.5) logits = FullyConnected( 'linear', l, classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, latent
def densenet_backbone(image, activations_datatype_size_byte, weights_datatype_size_byte, results_datatype_size_byte, systolic_array_height, systolic_array_width, accumulator_array_height, mpusim_logdir): constant_init = tf.constant_initializer(1) with argscope(mpusim_conv2d, data_format='NHWC', use_bias=False), \ argscope([mpusim_conv2d, mpusim_fully_connected], nl=tf.identity, kernel_initializer=constant_init, activations_datatype_size_byte=activations_datatype_size_byte, weights_datatype_size_byte=weights_datatype_size_byte, results_datatype_size_byte=results_datatype_size_byte, systolic_array_height=systolic_array_height, systolic_array_width=systolic_array_width, activation_fifo_depth=8, accumulator_array_height=accumulator_array_height, log_file_output_dir=mpusim_logdir, model_name='densenet_264_sys_arr_h_{}_sys_arr_w_{}_acc_arr_h_{}'.format(systolic_array_height, systolic_array_width, accumulator_array_height)): l = mpusim_conv2d('conv1', image, 2 * GROWTH_RATE, 7, stride=2, activation=BNReLU) l = MaxPooling('pool1', l, shape=3, stride=2, padding='SAME') l = add_dense_block(l, 'block0', 6) l = add_dense_block(l, 'block1', 12) l = add_dense_block(l, 'block2', 64) l = add_dense_block(l, 'block3', 48, last=True) l = BNReLU('bnrelu_last', l) l = GlobalAvgPooling('gap', l) return mpusim_fully_connected('linear', l, out_dim=1000)
def googlenet_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_IN'), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw, is_quant=True if qw > 0 else False): logits = (LinearWrap(image).Conv2DQuant( 'conv1', 64, 7, stride=2, is_quant=False).MaxPooling( 'pool1', shape=3, stride=2, padding='SAME').BNReLUQuant('pool1/out').Conv2DQuant( 'conv2/3x3_reduce', 192, 1, nl=getBNReLUQuant).Conv2DQuant( 'conv2/3x3', 192, 3).MaxPooling( 'pool2', shape=3, stride=2, padding='SAME').BNReLUQuant('pool2/out').apply( inception_block, 'incpetion_3a', 96, 128, 32).apply(inception_block, 'incpetion_3b', 192, 192, 96, is_last_block=True).apply( inception_block, 'incpetion_4a', 256, 208, 48).apply( inception_block, 'incpetion_4b', 224, 224, 64). apply(inception_block, 'incpetion_4c', 192, 256, 64).apply(inception_block, 'incpetion_4d', 176, 288, 64).apply(inception_block, 'incpetion_4e', 384, 320, 128, is_last_block=True).apply( inception_block, 'incpetion_5a', 384, 320, 128).apply(inception_block, 'incpetion_5b', 512, 384, 128, is_last_block=True, is_last=True). GlobalAvgPooling('pool5').FullyConnected('linear', out_dim=1000, nl=tf.identity)()) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # Note that this pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def maskrcnn_head(feature, num_class): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: mask_logits (N x num_category x 14 x 14): """ with argscope([Conv2D, Deconv2D], data_format='NCHW', W_init=tf.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='normal')): l = Deconv2D('deconv', feature, 256, 2, stride=2, nl=tf.nn.relu) l = Conv2D('conv', l, num_class - 1, 1) return l
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(mpusim_conv2d, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # Note that TF pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = mpusim_conv2d('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = mpusim_fully_connected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet_backbone(image, num_blocks, group_func, block_func, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT'), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw): logits = (LinearWrap(image) .Conv2DQuant('conv0', 64, 7, stride=2, nl=BNReLU, is_quant=False) .MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2, is_last=True) .GlobalAvgPooling('gap') .FullyConnected('linear', 1000, nl=tf.identity)()) return logits
def maskrcnn_head(feature, num_class): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: mask_logits (N x num_category x 14 x 14): """ with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out l = Conv2DTranspose('deconv', feature, 256, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_class - 1, 1) return l
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): try: resize = tf.compat.v2.image.resize_images with tf.name_scope(name): shp2d = tf.shape(x)[2:] x = tf.transpose(x, [0, 2, 3, 1]) x = resize(x, shp2d * 2, 'nearest') x = tf.transpose(x, [0, 3, 1, 2]) return x except AttributeError: return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') with argscope(Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tfv1.variance_scaling_initializer(scale=1.)): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]
def _get_NN_prediction(self, image): image = tf.cast(image, tf.float32) / 255.0 with argscope(Conv2D, activation=tf.nn.relu): l = Conv2D('conv0', image, 32, 5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, 32, 5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, 64, 4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, 64, 3) l = FullyConnected('fc0', l, 512) l = PReLU('prelu', l) logits = FullyConnected('fc-pi', l, self.num_actions) # unnormalized policy value = FullyConnected('fc-v', l, 1) return logits, value
def retinanet_head(featuremap, channel, num_anchors): """ Returns: label_logits: fHxfWxNA box_logits: fHxfWxNAx4 """ with argscope( Conv2D, data_format='channels_first', kernel_initializer=tf.random_normal_initializer(stddev=0.01)): #hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu) conv_num = 4 cls_hidden = featuremap box_hidden = featuremap for i in range(conv_num): cls_hidden = Conv2D('conv{}_cls_hidden'.format(i), cls_hidden, channel, 3, activation=tf.nn.relu) box_hidden = Conv2D('conv{}_box_hidden'.format(i), box_hidden, channel, 3, activation=tf.nn.relu) label_logits = Conv2D('class', cls_hidden, num_anchors * cfg.DATA.NUM_CLASS, 1) box_logits = Conv2D('box', box_hidden, 4 * num_anchors, 1) # 1, NA(*4), im/16, im/16 (NCHW) label_logits = tf.transpose( label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA 1xfhxfwxna*class label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4) box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4 label_logits = tf.reshape( label_logits, tf.stack([shp[2], shp[3], num_anchors, cfg.DATA.NUM_CLASS])) #fhxfwxnaxclass return label_logits, box_logits
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with argscope(Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1.)): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal')): logits = (LinearWrap(image) .tf.pad([[0, 0], [0, 0], [3, 3], [3, 3]]) .Conv2D('conv0', 64, 7, strides=2, activation=GNReLU, padding='VALID') .tf.pad([[0, 0], [0, 0], [1, 1], [1, 1]]) .MaxPooling('pool0', shape=3, stride=2, padding='VALID') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) .GlobalAvgPooling('gap') .FullyConnected('linear', 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01))()) return logits
def alexnet_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=tf.random_normal_initializer(stddev=0.01), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw): logits = (LinearWrap(image).Conv2DQuant( 'conv1', 96, 11, stride=4, is_quant=False, padding='VALID').MaxPooling( 'pool1', shape=3, stride=2, padding='VALID').BNReLUQuant('bnquant2').Conv2DQuant( 'conv2', 256, 5).MaxPooling( 'pool2', shape=3, stride=2, padding='VALID').BNReLUQuant('bnquant3').Conv2DQuant( 'conv3', 384, 3, nl=getBNReLUQuant).Conv2DQuant( 'conv4', 384, 3, nl=getBNReLUQuant).Conv2DQuant( 'conv5', 256, 3).MaxPooling('pool5', shape=3, stride=2, padding='VALID'). BNReLUQuant('bnquant6').Conv2DQuant( 'fc6', 4096, 6, nl=getfcBNReLUQuant, padding='VALID', W_init=tf.random_normal_initializer(stddev=0.005), use_bias=True).Conv2DQuant( 'fc7', 4096, 1, nl=getfcBNReLU, padding='VALID', W_init=tf.random_normal_initializer(stddev=0.005), use_bias=True).FullyConnected( 'fc8', out_dim=1000, nl=tf.identity, W_init=tf.random_normal_initializer( stddev=0.01))()) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def _get_NN_prediction(self, image): image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=tf.identity) l = PReLU('prelu', l) policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity) return policy
def darknet(image, use_fp16): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 32, 3, strides=1, activation=BNLeakyReLU) l = Conv2D('conv1', l, 64, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group0', l, 64, 1) l = Conv2D('conv2', l, 128, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group1', l, 128, 2) l = Conv2D('conv3', l, 256, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group2', l, 256, 8) l = Conv2D('conv4', l, 512, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group3', l, 512, 8) l = Conv2D('conv5', l, 1024, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group4', l, 1024, 4) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs): """ Args: feature (any shape): num_classes(int): num_category + 1 num_convs (int): number of conv layers Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ l = feature with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return fastrcnn_outputs('outputs', l, num_classes)
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits (N x num_category x 2s x 2s): """ assert norm in [None, 'GN'], norm l = feature with argscope( [Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D( 'conv', l, num_category, 1, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) return l
def resnet_backbone(image, num_blocks, grp_fun, blck_fun, nfeatures): # from tf.contrib.layers import variance_scaling_initializer with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # TODO evaluate conv depth logits = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(grp_fun, 'group0', blck_fun, 64, num_blocks[0], 1) .apply(grp_fun, 'group1', blck_fun, 128, num_blocks[1], 2) .apply(grp_fun, 'group2', blck_fun, 256, num_blocks[2], 2) # .apply(grp_fun, 'group3', blck_fun, 512, num_blocks[3], 2) .apply(grp_fun, 'group3', blck_fun, 256, num_blocks[3], 1) .GlobalAvgPooling('gap') .FullyConnected('fc0', 1000) .FullyConnected('fc1', 500) .FullyConnected('linear', nfeatures, nl=tf.identity)()) return logits
def rpn_head(featuremap): with tf.variable_scope('rpn'), \ argscope(Conv2D, data_format='NCHW', W_init=tf.random_normal_initializer(stddev=0.01)): hidden = Conv2D('conv0', featuremap, 1024, 3, nl=tf.nn.relu) label_logits = Conv2D('class', hidden, config.NR_ANCHOR, 1) box_logits = Conv2D('box', hidden, 4 * config.NR_ANCHOR, 1) # 1, NA(*4), im/16, im/16 (NCHW) label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4) box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], config.NR_ANCHOR, 4])) # fHxfWxNAx4 return label_logits, box_logits
def maskrcnn_upXconv_head(feature, num_category, num_convs): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers Returns: mask_logits (N x num_category x 2s x 2s): """ l = feature with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_category, 1) return l
def rpn_head(featuremap, channel, num_anchors): """ Returns: label_logits: fHxfWxNA box_logits: fHxfWxNAx4 """ with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.random_normal_initializer(stddev=0.01)): hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu) label_logits = Conv2D('class', hidden, num_anchors, 1) box_logits = Conv2D('box', hidden, 4 * num_anchors, 1) # 1, NA(*4), im/16, im/16 (NCHW) label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4) box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4 return label_logits, box_logits
def densenet_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_IN'), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw, is_quant=True if qw > 0 else False): logits = (LinearWrap(image) .Conv2DQuant('conv1', 2 * GROWTH_RATE, 7, stride=2, nl=BNReLU, is_quant=False) .MaxPooling('pool1', shape=3, stride=2, padding='SAME') # 56 .apply(add_dense_block, 'block0', 6) # 28 .apply(add_dense_block, 'block1', 12) # 14 .apply(add_dense_block, 'block2', 24) # 7 .apply(add_dense_block, 'block3', 16, last=True) .BNReLU('bnrelu_last') .GlobalAvgPooling('gap') .FullyConnected('linear', out_dim=1000, nl=tf.identity, W_init=variance_scaling_initializer(mode='FAN_IN'))()) return logits
def alexnet_backbone(image, qw=1): with argscope(Conv2DQuant, nl=tf.identity, use_bias=False, W_init=tf.random_normal_initializer(stddev=0.01), data_format=get_arg_scope()['Conv2D']['data_format'], nbit=qw): logits = (LinearWrap(image) .Conv2DQuant('conv1', 96, 11, stride=4, is_quant=False, padding='VALID') .MaxPooling('pool1', shape=3, stride=2, padding='VALID') .BNReLUQuant('bnquant2') .Conv2DQuant('conv2', 256, 5) .MaxPooling('pool2', shape=3, stride=2, padding='VALID') .BNReLUQuant('bnquant3') .Conv2DQuant('conv3', 384, 3, nl=getBNReLUQuant) .Conv2DQuant('conv4', 384, 3, nl=getBNReLUQuant) .Conv2DQuant('conv5', 256, 3) .MaxPooling('pool5', shape=3, stride=2, padding='VALID') .BNReLUQuant('bnquant6') .Conv2DQuant('fc6', 4096, 6, nl=getfcBNReLUQuant, padding='VALID', W_init=tf.random_normal_initializer(stddev=0.005), use_bias=True) .Conv2DQuant('fc7', 4096, 1, nl=getfcBNReLU, padding='VALID', W_init=tf.random_normal_initializer(stddev=0.005), use_bias=True) .FullyConnected('fc8', out_dim=1000, nl=tf.identity, W_init=tf.random_normal_initializer(stddev=0.01))()) return logits
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None): """ Args: feature (NCHW): num_classes(int): num_category + 1 num_convs (int): number of conv layers norm (str or None): either None or 'GN' Returns: 2D head feature """ assert norm in [None, 'GN'], norm l = feature with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return l
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, training=False), \ custom_getter_scope(maybe_freeze_affine): yield
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, use_local_stat=False), \ custom_getter_scope(maybe_freeze_affine): yield
def resnet_argscope(): with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False), \ argscope(BatchNorm, use_local_stat=False): yield