def inception_fc(layer_dict, n_class, keep_prob=1., inputs=None, pretrained_dict=None, is_training=True, bn=False, init_w=None, trainable=True, wd=0): if inputs is not None: layer_dict['cur_input'] = inputs layer_dict['cur_input'] = L.global_avg_pool(layer_dict['cur_input'], keepdims=True) L.drop_out(layer_dict, is_training, keep_prob=keep_prob) L.conv(filter_size=1, out_dim=n_class, layer_dict=layer_dict, pretrained_dict=pretrained_dict, trainable=trainable, bn=False, init_w=init_w, wd=wd, is_training=is_training, name='loss3_classifier') layer_dict['cur_input'] = tf.squeeze(layer_dict['cur_input'], [1, 2]) return layer_dict['cur_input']
def _decoder(low_level_features, aspp_out, output_size: (int, int), is_training: bool): """Deeplabv3 decoder block Consists of concat(conv(llf), aspp) -> sepconv -> sepconv -> conv -> resize Arguments: low_level_features: features from lower layer in feature extractor aspp_out: encoder output output_size: model output size, should be equal to inputs size is_training: whether current mode is training or not Returns: decoder output """ with tf.variable_scope("decoder"): low_level_features = conv( low_level_features, filters=48, kernel_size=1, name="low_level_features_projection", with_relu=True, with_bn=True, is_training=is_training) # convolution on low level features aspp_out = resize_bilinear( aspp_out, target_size=low_level_features.shape.as_list()[1:3], name="aspp_resize" ) # resize aspp output to low level features spatial size outputs = tf.concat( [low_level_features, aspp_out], axis=-1) # concatentate aspp and low level features outputs = xception.xception_block(outputs, block_filters=[256, 256], final_strides=1, with_depth_relu=True, is_training=is_training, name="decoder_block") outputs = conv(outputs, filters=2, kernel_size=1, is_training=is_training, name="conv") outputs = resize_bilinear(outputs, target_size=output_size, name="conv_resize") outputs = conv(outputs, filters=2, kernel_size=1, is_training=is_training, name="logits_conv") # logits return outputs
def inception_conv_layers(layer_dict, inputs=None, pretrained_dict=None, bn=False, wd=0, init_w=None, is_training=True, trainable=True, conv_stride=2): if inputs is None: inputs = layer_dict['cur_input'] layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, nl=tf.nn.relu, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): conv1 = L.conv(7, 64, inputs=inputs, name='conv1_7x7_s2', stride=conv_stride) padding1 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) conv1_pad = tf.pad(conv1, padding1, 'CONSTANT') pool1, _ = L.max_pool(layer_dict=layer_dict, inputs=conv1_pad, stride=2, filter_size=3, padding='VALID', name='pool1') pool1_lrn = tf.nn.local_response_normalization(pool1, depth_radius=2, alpha=2e-05, beta=0.75, name='pool1_lrn') conv2_reduce = L.conv(1, 64, inputs=pool1_lrn, name='conv2_3x3_reduce') conv2 = L.conv(3, 192, inputs=conv2_reduce, name='conv2_3x3') padding2 = tf.constant([[0, 0], [0, 1], [0, 1], [0, 0]]) conv2_pad = tf.pad(conv2, padding2, 'CONSTANT') pool2, _ = L.max_pool(layer_dict=layer_dict, inputs=conv2_pad, stride=2, filter_size=3, padding='VALID', name='pool2') pool2_lrn = tf.nn.local_response_normalization(pool2, depth_radius=2, alpha=2e-05, beta=0.75, name='pool2_lrn') layer_dict['cur_input'] = pool2_lrn return pool2_lrn
def cell(inputs, tokens, adjmat, downsample=False, name=None): """ cell """ filters = inputs.shape[1] d = filters // FLAGS.ratio num_nodes, tensors = len(adjmat), [] for n in range(num_nodes): func = ops[tokens[n]] idx, = np.nonzero(adjmat[:, n]) if len(idx) == 0: x = layers.bn_relu(inputs) x = layers.conv(x, d, (1, 1)) x = layers.bn_relu(x) x = func(x, downsample) else: x = fluid.layers.sums([tensors[i] for i in idx]) x = layers.bn_relu(x) x = func(x) tensors.append(x) free_ends, = np.where(~adjmat.any(axis=1)) tensors = [tensors[i] for i in free_ends] filters = filters * 2 if downsample else filters x = fluid.layers.concat(tensors, axis=1) x = layers.conv(x, filters, (1, 1)) return x
def auxiliary_classifier(layer_dict, n_class, keep_prob=1., inputs=None, pretrained_dict=None, is_training=True, bn=False, init_w=None, trainable=True, wd=0): if inputs is not None: layer_dict['cur_input'] = inputs layer_dict['cur_input'] = L.global_avg_pool(layer_dict['cur_input'], keepdims=True) arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv, L.linear], layer_dict=layer_dict, bn=bn, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): L.conv(1, 128, name='conv', stride=1, nl=tf.nn.relu) L.linear(out_dim=512, name='fc_1', nl=tf.nn.relu) L.drop_out(layer_dict, is_training, keep_prob=keep_prob) L.linear(out_dim=512, name='fc_2', nl=tf.nn.relu) L.drop_out(layer_dict, is_training, keep_prob=keep_prob) L.linear(out_dim=n_class, name='classifier', bn=False) return layer_dict['cur_input']
def conv_base(inputs, kernel, dilation=None, downsample=False): """ conv_base """ if dilation is None: dilation = (1, 1) filters = inputs.shape[1] if downsample: output = layers.conv(inputs, filters * 2, kernel, (2, 2)) else: output = layers.conv(inputs, filters, kernel, dilation=dilation) return output
def pair_base(inputs, kernel, downsample=False): """ pair_base """ filters = inputs.shape[1] if downsample: output = layers.conv(inputs, filters, (1, kernel), (1, 2)) output = layers.conv(output, filters, (kernel, 1), (2, 1)) output = layers.conv(output, filters * 2, (1, 1)) else: output = layers.conv(inputs, filters, (1, kernel)) output = layers.conv(output, filters, (kernel, 1)) return output
def __init__(self, opts): super(Discriminator, self).__init__(opts) ndf, nc = opts.model.ndf, opts.data.channels self.main = nn.Sequential( conv(nc, ndf, 4, 2, 1, activation='leaky_relu'), conv(ndf, ndf * 2, 4, 2, 1, activation='leaky_relu'), conv(ndf * 2, ndf * 4, 4, 2, 1, activation='leaky_relu'), conv(ndf * 4, ndf * 8, 4, 2, 1, activation='leaky_relu'), # state size. (ndf*8) x 4 x 4 nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid())
def _image_pooling(inputs, filters: int, target_size: (int, int), is_training: bool): """Pooling block Consists of global_avg_pool_2d -> conv -> resize Arguments: inputs filters target_size: pooling output is_training: whether current mode is training or not Returns: pooling output """ with tf.variable_scope("image_pooling"): outputs = global_average_pooling(inputs, name="pool") outputs = conv(outputs, filters=filters, kernel_size=1, with_bn=True, with_relu=True, is_training=is_training, name="conv") outputs = resize_bilinear(outputs, target_size=target_size, name="resize") return outputs
def make_guide_params(self): conv1 = conv(self.n_in, self.guide_pts, 1, norm=True) conv2 = nn.Sequential(nn.Conv2d(self.guide_pts, 1, 1), nn.Sigmoid()) guide_params = nn.Module() guide_params.conv1 = conv1 guide_params.conv2 = conv2 return guide_params
def net(data, train=False, data_type=tf.float32): pool1 = layers.conv(data, 32, 'conv-1', data_type) pool2 = layers.conv(pool1, 64, 'conv-2', data_type) pool2_shape = pool2.get_shape().as_list() pool_to_fc = [ pool2_shape[0], pool2_shape[1] * pool2_shape[2] * pool2_shape[3] ] reshape = tf.reshape(pool2, pool_to_fc) fc1, fc1_weights, fc1_biases = layers.fc(reshape, 512, 'fc1', data_type) fc1 = tf.nn.relu(fc1) if train: fc1 = layers.dropout(fc1, keep_prob) #tf.summary.histogram("fc1/relu", fc1) fc2_logits, fc2_weights, fc2_biases = layers.fc(fc1, 10, 'fc2', data_type) return [fc2_logits, fc1_weights, fc1_biases, fc2_weights, fc2_biases]
def avgpool_base(inputs, kernel, downsample=False): """ avgpool_base """ if downsample: filters = inputs.shape[1] output = layers.avgpool(inputs, kernel, (2, 2)) output = layers.conv(output, filters * 2, (1, 1)) else: output = layers.avgpool(inputs, kernel) return output
def make_guide_params(self): ccm = conv(self.n_in, self.n_in, 1, norm=False, relu=False, weights_init=( np.identity(self.n_in, dtype=np.float32) + np.random.randn(1).astype(np.float32) * 1e-4).reshape( (self.n_in, self.n_in, 1, 1)), bias_init=torch.zeros(self.n_in)) shifts = np.linspace(0, 1, self.guide_pts, endpoint=False, dtype=np.float32) shifts = shifts[np.newaxis, np.newaxis, np.newaxis, :] shifts = np.tile(shifts, (self.n_in, 1, 1, 1)) shifts = nn.Parameter(data=torch.from_numpy(shifts)) slopes = np.zeros([1, self.n_in, 1, 1, self.guide_pts], dtype=np.float32) slopes[:, :, :, :, 0] = 1.0 slopes = nn.Parameter(data=torch.from_numpy(slopes)) projection = conv(self.n_in, 1, 1, norm=False, relu=False, weights_init=torch.ones(1, self.n_in, 1, 1) / self.n_in, bias_init=torch.zeros(1)) guide_params = nn.Module() guide_params.ccm = ccm guide_params.shifts = shifts guide_params.slopes = slopes guide_params.projection = projection return guide_params
def net(data, train=False, data_type = tf.float16): pool1 = layers.conv(data, 32, 'conv-1') pool2 = layers.conv(pool1, 64, 'conv-2') data_shape = data.get_shape().as_list() data_to_fc = [data_shape[0], data_shape[1] * data_shape[2] * data_shape[3]] reshape = tf.reshape(data, data_to_fc) fc1, fc1_weights, fc1_biases = layers.fc(reshape, 512, 'fc1', data_type) # fc1 = tf.nn.relu(fc1) fc2, fc2_weights, fc2_biases = layers.fc(fc1, 512//2, 'fc2', data_type) # fc2 = tf.nn.relu(fc2) fc3, fc3_weights, fc3_biases = layers.fc(fc2, 512//4, 'fc3', data_type) # fc3 = tf.nn.relu(fc3) fc4, fc4_weights, fc4_biases = layers.fc(fc3, 10, 'fc4', data_type) # if train: # fc1 = layers.dropout(fc1, keep_prob) #tf.summary.histogram("fc1/relu", fc1) return [fc4, fc1_weights, fc1_biases, fc2_weights, fc2_biases, fc3_weights, fc3_biases, fc4_weights, fc4_biases]
def inception_conv_layers_cifar(layer_dict, inputs=None, pretrained_dict=None, bn=False, wd=0, init_w=None, is_training=True, trainable=True, conv_stride=2): if inputs is None: inputs = layer_dict['cur_input'] layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, nl=tf.nn.relu, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): L.conv(7, 64, name='conv1_7x7_s2', stride=conv_stride) L.conv(1, 64, name='conv2_3x3_reduce') L.conv(3, 192, name='conv2_3x3') return layer_dict['cur_input']
def __init__(self): super(FusionComp, self).__init__() self.d_layer1 = vgg_conv(1, 16, 5, 1) self.d_layer2 = vgg_conv(16, 32, 3, 1) self.d_layer3 = vgg_conv(32, 64, 3, 2) self.d_layer4 = vgg_conv(64, 128, 3, 2) self.d_layer5 = vgg_conv(128, 128, 3, 2) self.rgb_layer1 = vgg_conv(3, 48, 5, 1) self.rgb_layer2 = vgg_conv(48, 96, 3, 1) self.rgb_layer3 = vgg_conv(96, 192, 3, 2) self.rgb_layer4 = vgg_conv(192, 384, 3, 2) self.rgb_layer5 = vgg_conv(384, 384, 3, 2) self.up_layer5 = up_conv(512, 256, 3) self.conv_layer4 = conv(256 + 512, 256, 3, 1, 1, 1) self.up_layer4 = up_conv(256, 128, 3) self.conv_layer3 = conv(128 + 256, 128, 3, 1, 1, 1) self.up_layer3 = up_conv(128, 64, 3) self.conv_layer2 = conv(64 + 128, 64, 3, 1, 1, 1) self.up_layer2 = up_conv(64, 32, 3) self.conv_layer1 = conv(32 + 64, 32, 3, 1, 1, 1) self.up_layer1 = up_conv(32, 16, 3) self.out_layer = self.out_conv(16, 1, 1)
def xception_block(inputs, block_filters: [int], is_training: bool, name: str, final_strides: int = 1, dilation_rate: int = 1, with_depth_relu: bool = False, residual_type: str = None): """Xception block template Arguments: inputs block_filters: number of separable conv in the block is_training: whether current mode is training or not name final_strides: output spatial strides dilation_rate with_depth_relu residual_type: either 'conv' or 'sum' or None """ residual = None if residual_type == "sum": residual = inputs if residual_type == "conv": residual = conv(inputs, filters=block_filters[-1], kernel_size=1, strides=final_strides, is_training=is_training, name=f"{name}_residual") outputs = inputs for i, f in enumerate(block_filters): strides = final_strides if i == len(block_filters) - 1 else 1 outputs = separable_conv(outputs, filters=f, kernel_size=3, strides=strides, dilation_rate=dilation_rate, with_depth_relu=with_depth_relu, is_training=is_training, name=f"{name}_separable_{str(i + 1)}") if residual is not None: outputs = tf.add(outputs, residual) return outputs
def net(inputs, output, tokens, adjvec): """ create net """ num_nodes = len(tokens) // 2 def slice(vec): """ slice vec """ mat = np.zeros([num_nodes, num_nodes]) def pos(x): """ pos """ return x * (x - 1) // 2 for i in range(1, num_nodes): mat[0:i, i] = vec[pos(i):pos(i + 1)] return mat normal_to, reduce_to = np.split(tokens, 2) normal_ad, reduce_ad = map(slice, np.split(adjvec, 2)) x = layers.conv(inputs, FLAGS.width, (3, 3)) c = 1 for _ in range(FLAGS.num_cells): x = cell(x, normal_to, normal_ad) c += 1 for _ in range(1, FLAGS.num_stages): x = cell(x, reduce_to, reduce_ad, downsample=True) c += 1 for _ in range(1, FLAGS.num_cells): x = cell(x, normal_to, normal_ad) c += 1 x = layers.bn_relu(x) x = layers.global_avgpool(x) x = layers.dropout(x) logits = layers.fully_connected(x, num_classes) x = fluid.layers.softmax_with_cross_entropy(logits, output, numeric_stable_mode=True) loss = fluid.layers.reduce_mean(x) accuracy = fluid.layers.accuracy(input=logits, label=output) return loss, accuracy
def inception_layer(conv_11_size, conv_33_reduce_size, conv_33_size, conv_55_reduce_size, conv_55_size, pool_size, layer_dict, inputs=None, bn=False, wd=0, init_w=None, pretrained_dict=None, trainable=True, is_training=True, name='inception'): if inputs is None: inputs = layer_dict['cur_input'] layer_dict['cur_input'] = inputs arg_scope = tf.contrib.framework.arg_scope with arg_scope([L.conv], layer_dict=layer_dict, pretrained_dict=pretrained_dict, bn=bn, nl=tf.nn.relu, init_w=init_w, trainable=trainable, is_training=is_training, wd=wd, add_summary=False): conv_11 = L.conv(filter_size=1, out_dim=conv_11_size, inputs=inputs, name='{}_1x1'.format(name)) L.conv(filter_size=1, out_dim=conv_33_reduce_size, inputs=inputs, name='{}_3x3_reduce'.format(name)) conv_33 = L.conv(filter_size=3, out_dim=conv_33_size, name='{}_3x3'.format(name)) L.conv(filter_size=1, out_dim=conv_55_reduce_size, inputs=inputs, name='{}_5x5_reduce'.format(name)) conv_55 = L.conv(filter_size=5, out_dim=conv_55_size, name='{}_5x5'.format(name)) L.max_pool(layer_dict=layer_dict, inputs=inputs, stride=1, filter_size=3, padding='SAME', name='{}_pool'.format(name)) convpool = L.conv(filter_size=1, out_dim=pool_size, name='{}_pool_proj'.format(name)) output = tf.concat([conv_11, conv_33, conv_55, convpool], 3, name='{}_concat'.format(name)) layer_dict['cur_input'] = output layer_dict[name] = output return output
def model_fn(x, is_training: bool): with tf.variable_scope("xception"): with tf.variable_scope("entry_flow"): x = conv(x, filters=32, kernel_size=3, strides=2, with_bn=True, with_relu=True, is_training=is_training, name="conv1") x = conv(x, filters=64, kernel_size=3, with_bn=True, with_relu=True, is_training=is_training, name="conv2") x = xception_block(x, block_filters=[128] * 3, final_strides=2, residual_type="conv", is_training=is_training, name="block1") x = xception_block(x, block_filters=[256] * 3, final_strides=2, residual_type="conv", is_training=is_training, name="block2") x = xception_block(x, block_filters=[728] * 3, final_strides=entry_block_stride, residual_type="conv", is_training=is_training, name="block3") with tf.variable_scope("middle_flow"): for i, _ in enumerate(range(16)): x = xception_block(x, block_filters=[728] * 3, dilation_rate=middle_block_rate, residual_type="sum", is_training=is_training, name="block" + str(i + 1)) with tf.variable_scope("exit_flow"): x = xception_block(x, block_filters=[728, 1024, 1024], dilation_rate=exit_block_rates[0], residual_type="conv", is_training=is_training, name="block1") x = xception_block(x, block_filters=[1536, 1536, 2048], dilation_rate=exit_block_rates[1], with_depth_relu=True, is_training=is_training, name="block2") return x
def make_coefficient_params(self, lowres): # splat params splat = [] in_channels = self.n_in num_downsamples = int(np.log2(min(lowres) / self.spatial_bin)) extra_convs = max(0, int(np.log2(self.spatial_bin) - np.log2(16))) extra_convs = np.linspace(0, num_downsamples - 1, extra_convs, dtype=np.int).tolist() for i in range(num_downsamples): out_channels = (2**i) * self.feature_multiplier splat.append( conv(in_channels, out_channels, 3, stride=2, norm=False if i == 0 else self.norm)) if i in extra_convs: splat.append( conv(out_channels, out_channels, 3, norm=self.norm)) in_channels = out_channels splat = nn.Sequential(*splat) splat_channels = in_channels # global params global_conv = [] in_channels = splat_channels for _ in range(int(np.log2(self.spatial_bin / 4))): global_conv.append( conv(in_channels, 8 * self.feature_multiplier, 3, stride=2, norm=self.norm)) in_channels = 8 * self.feature_multiplier global_conv.append(nn.AdaptiveAvgPool2d(4)) global_conv = nn.Sequential(*global_conv) global_fc = nn.Sequential( fc(128 * self.feature_multiplier, 32 * self.feature_multiplier, norm=self.norm), fc(32 * self.feature_multiplier, 16 * self.feature_multiplier, norm=self.norm), fc(16 * self.feature_multiplier, 8 * self.feature_multiplier, norm=False, relu=False)) # local params local = nn.Sequential( conv(splat_channels, 8 * self.feature_multiplier, 3), conv(8 * self.feature_multiplier, 8 * self.feature_multiplier, 3, bias=False, norm=False, relu=False)) # prediction params prediction = conv(8 * self.feature_multiplier, self.luma_bins * (self.n_in + 1) * self.n_out, 1, norm=False, relu=False) coefficient_params = nn.Module() coefficient_params.splat = splat coefficient_params.global_conv = global_conv coefficient_params.global_fc = global_fc coefficient_params.local = local coefficient_params.prediction = prediction return coefficient_params
def _create_model(self, input_shape): droprate = 0.20 enc_cfg, dec_cfg = configs[self.cfg_idx] # ================== encoder ================== with tf.name_scope('encoder_input'): # (N, M, 1) encoder_input = Input(shape=input_shape, name="encoder_input") with tf.name_scope('encoder_input_noise'): # (N, M, 1) encoder = GaussianNoise(stddev=const.NOISE_STDDEV)(encoder_input) with tf.name_scope('encoder_conv_1'): # (N/2, M/2, 32) encoder = conv(enc_cfg[0], strides=2)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) with tf.name_scope('encoder_conv_2'): # (N/4, M/4, 32) encoder = conv(enc_cfg[1], strides=2)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) with tf.name_scope('encoder_conv_3'): # (N/8, M/8, 32) encoder = conv(enc_cfg[2], strides=2)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) if enc_cfg[3] > 0: with tf.name_scope('encoder_conv_4'): # (N/10, M/10, 32) encoder = conv(enc_cfg[3], strides=2)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) if enc_cfg[4] > 0: with tf.name_scope('encoder_conv_4'): # (N/10, M/10, 32) encoder = conv(enc_cfg[4], strides=2)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) with tf.name_scope('encoder_fully_connected_1'): # (512) encoder = Flatten()(encoder) encoder = dense(enc_cfg[5], activation=activations.lrelu)(encoder) encoder = batchnorm()(encoder) encoder = dropout(droprate)(encoder) with tf.name_scope('encoder_fully_connected_2'): # (256) encoder = dense(enc_cfg[6], activation=activations.relu)(encoder) encoder_model = Model(inputs=encoder_input, outputs=encoder, name="encoder") # ================== decoder ================== with tf.name_scope('decoder_input'): # (N) decoder_input = Input(shape=encoder_model.output_shape[1:], name="decoder_input") # decoder_input = encoder with tf.name_scope('decoder_fully_connected_1'): # (256) decoder = dense(dec_cfg[0])(decoder_input) with tf.name_scope('decoder_reshape_1'): # (2, 2, 64) decoder = Reshape((2, 2, dec_cfg[0] // 4))(decoder) with tf.name_scope('decoder_deconv_1'): # (4, 4, 32) decoder = deconv(dec_cfg[1], strides=2)(decoder) decoder = batchnorm()(decoder) decoder = dropout(droprate)(decoder) with tf.name_scope('decoder_deconv_2'): # (8, 8, 32) decoder = deconv(dec_cfg[2], strides=2)(decoder) decoder = batchnorm()(decoder) decoder = dropout(droprate)(decoder) with tf.name_scope('decoder_deconv_3'): # (16, 16, 32) decoder = deconv(dec_cfg[3], strides=2)(decoder) decoder = batchnorm()(decoder) decoder = dropout(droprate)(decoder) with tf.name_scope('decoder_deconv_4'): # (32, 32, 32) decoder = deconv(dec_cfg[4], strides=2)(decoder) decoder = batchnorm()(decoder) decoder = dropout(droprate)(decoder) with tf.name_scope('decoder_deconv_5'): # (64, 64, 3) decoder = deconv(3, strides=2, activation=activations.relu)(decoder) decoder_model = Model(inputs=decoder_input, outputs=decoder, name='decoder') # ================== CAE ================== model = Model(inputs=encoder_input, outputs=decoder_model(encoder_model(encoder_input)), name='cae') self.encoder_model = encoder_model self.decoder_model = decoder_model return model
def prediction(self): if self._prediction == None: # zero-mean input with tf.name_scope('preprocess') as scope: mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='zero_mean') centered_data = self.data - mean layer_name = 'conv1_1' conv1_1, kernel, biases = layers.conv(centered_data, 3, 3, 64, 1, 1, name=layer_name, trainable=False, log_weights=True) self.parameters += [kernel, biases] layer_name = 'conv1_2' conv1_2, kernel, biases = layers.conv(conv1_1, 3, 3, 64, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] # pool1 with tf.variable_scope('conv1_2') as scope: pool1 = tf.nn.max_pool( conv1_2, ksize=[1, 2, 2, 1], #TODO strides=[1, 2, 2, 1], padding='SAME', name=scope.name + 'pool1') layer_name = 'conv2_1' conv2_1, kernel, biases = layers.conv(pool1, 3, 3, 128, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] layer_name = 'conv2_2' conv2_2, kernel, biases = layers.conv(conv2_1, 3, 3, 128, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] # pool2 with tf.variable_scope('conv2_2') as scope: pool2 = tf.nn.max_pool(conv2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=scope.name + 'pool2') layer_name = 'conv3_1' conv3_1, kernel, biases = layers.conv(pool2, 3, 3, 256, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] layer_name = 'conv3_2' conv3_2, kernel, biases = layers.conv(conv3_1, 3, 3, 256, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] layer_name = 'conv3_3' conv3_3, kernel, biases = layers.conv(conv3_2, 3, 3, 256, 1, 1, name=layer_name, trainable=False) self.parameters += [kernel, biases] # pool3 with tf.variable_scope('conv3_3') as scope: pool3 = tf.nn.max_pool(conv3_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=scope.name + 'pool3') layer_name = 'conv4_1' conv4_1, kernel, biases = layers.conv(pool3, 3, 3, 512, 1, 1, name=layer_name, trainable=True, log_weights=True) self.parameters += [kernel, biases] layer_name = 'conv4_2' conv4_2, kernel, biases = layers.conv(conv4_1, 3, 3, 512, 1, 1, name=layer_name, trainable=True) self.parameters += [kernel, biases] layer_name = 'conv4_3' conv4_3, kernel, biases = layers.conv(conv4_2, 3, 3, 512, 1, 1, name=layer_name, trainable=True) self.parameters += [kernel, biases] # pool4 with tf.variable_scope('conv4_3') as scope: pool4 = tf.nn.max_pool(conv4_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name=scope.name + 'pool4') layer_name = 'conv5_1' conv5_1, kernel, biases = layers.conv(pool4, 3, 3, 512, 1, 1, name=layer_name, trainable=True) self.parameters += [kernel, biases] layer_name = 'conv5_2' conv5_2, kernel, biases = layers.conv(conv5_1, 3, 3, 512, 1, 1, name=layer_name, trainable=True) self.parameters += [kernel, biases] layer_name = 'conv5_3' conv5_3, kernel, biases = layers.conv(conv5_2, 3, 3, 512, 1, 1, name=layer_name, trainable=True) self.parameters += [kernel, biases] concat_layer = tf.concat([pool3, pool4, conv5_3], axis=3) #TODO Add dropout layer_name = 'conv6' conv6, kernel, biases = layers.conv(concat_layer, 3, 3, 64, 1, 1, name=layer_name, trainable=True) #TODO Add batch normalization self.parameters += [kernel, biases] layer_name = 'conv7' conv7, kernel, biases = layers.conv(conv6, 1, 1, 1, 1, 1, name=layer_name, trainable=True, log_weights=True) conv7_relu = tf.nn.leaky_relu(conv7, alpha=0.01, name='conv7_relu') self._prediction = conv7_relu return self._prediction
def _aspp(inputs, filters: int, atrous_rates: [int], is_training: bool): """ASPP block Consists of concat(conv1x1, 3x conv3x3, pool) -> conv -> dropout Arguments: inputs filters atrous_rates: atrous rates to be used, must be list of 3 integers is_training: whether current mode is training or not Returns: ASPP output """ assert len(atrous_rates) == 3 with tf.variable_scope("atrous_spatial_pyramid_pooling"): conv1x1 = conv(inputs, filters=filters, kernel_size=1, with_relu=True, with_bn=True, is_training=is_training, name="conv1x1") conv3x3_1 = separable_conv(inputs, filters=filters, kernel_size=3, dilation_rate=atrous_rates[0], with_depth_relu=True, is_training=is_training, name="conv3x3r6") conv3x3_2 = separable_conv(inputs, filters=filters, kernel_size=3, dilation_rate=atrous_rates[1], with_depth_relu=True, is_training=is_training, name="conv3x3r12") conv3x3_3 = separable_conv(inputs, filters=filters, kernel_size=3, dilation_rate=atrous_rates[2], with_depth_relu=True, is_training=is_training, name="conv3x3r18") pool_size = inputs.shape.as_list()[1:3] pool = _image_pooling(inputs, filters=filters, target_size=pool_size, is_training=is_training) outputs = tf.concat([conv1x1, conv3x3_1, conv3x3_2, conv3x3_3, pool], axis=-1) outputs = conv(outputs, filters=filters, kernel_size=1, name="projection", with_relu=True, with_bn=True, is_training=is_training) outputs = tf.layers.dropout(outputs, rate=0.5, name="dropout") return outputs