def _encoder(self, image): enc = res_encoder(image, layer_dims=self.encoder_dims, num_resblk=self.num_enc_resblk) texture_enc = enc shape_enc = enc # first convolution on common encoding if self.use_texture_conv: stride = 2 if self.texture_downsample else 1 texture_enc = tf.nn.relu(conv2d(texture_enc, self.texture_dims, 3, stride, name='enc_texture_conv')) else: assert self.texture_dims == self.encoder_dims, \ "Texture dim ({}) must match encoder dim ({}) " \ "if texture_conv is not used.".format(self.texture_dims, self.encoder_dims) assert not self.texture_downsample, \ "Must use texture_conv if texture_downsample." if self.use_shape_conv: shape_enc = tf.nn.relu(conv2d(shape_enc, self.shape_dims, 3, 1, name='enc_shape_conv')) else: assert self.shape_dims == self.encoder_dims, \ "Shape dim ({}) must match encoder dim ({}) " \ "if shape_conv is not used.".format(self.shape_dims, self.encoder_dims) for i in range(self.num_texture_resblk): name = 'texture_enc_{}'.format(i) if i == 0: # for backward compatibility name = 'texture_enc' texture_enc = residual_block(texture_enc, self.texture_dims, 3, 1, name) for i in range(self.num_shape_resblk): name = 'shape_enc_{}'.format(i) if i == 0: # for backward compatibility name = 'shape_enc' shape_enc = residual_block(shape_enc, self.shape_dims, 3, 1, name) if self.use_shape_deconv: shape_enc = deconv2d(shape_enc, 16,4,2) # W_t3 = weight_variable([4, 4, int(self.encoder_dims/2), int(self.encoder_dims)]) # b_t3 = bias_variable([3]) # shape_enc = conv2d_transpose_strided(shape_enc, W_t3, b_t3) up = tf.pad(shape_enc, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") shape_enc = conv2d(up, 3, 3, 1, padding='VALID', name='use_shape_deconv_conv') return texture_enc, shape_enc
def _encoder(self, image): enc = res_encoder(image, layer_dims=self.encoder_dims, num_resblk=self.num_enc_resblk) texture_enc = enc shape_enc = enc # first convolution on common encoding if self.use_texture_conv: stride = 2 if self.texture_downsample else 1 texture_enc = tf.nn.relu( conv2d(texture_enc, self.texture_dims, 3, stride, name='enc_texture_conv')) else: assert self.texture_dims == self.encoder_dims, \ "Texture dim ({}) must match encoder dim ({}) " \ "if texture_conv is not used.".format(self.texture_dims, self.encoder_dims) assert not self.texture_downsample, \ "Must use texture_conv if texture_downsample." if self.use_shape_conv: shape_enc = tf.nn.relu( conv2d(shape_enc, self.shape_dims, 3, 1, name='enc_shape_conv')) else: assert self.shape_dims == self.encoder_dims, \ "Shape dim ({}) must match encoder dim ({}) " \ "if shape_conv is not used.".format(self.shape_dims, self.encoder_dims) for i in range(self.num_texture_resblk): name = 'texture_enc_{}'.format(i) if i == 0: # for backward compatibility name = 'texture_enc' texture_enc = residual_block(texture_enc, self.texture_dims, 3, 1, name) for i in range(self.num_shape_resblk): name = 'shape_enc_{}'.format(i) if i == 0: # for backward compatibility name = 'shape_enc' shape_enc = residual_block(shape_enc, self.shape_dims, 3, 1, name) return texture_enc, shape_enc
def _decoder(self, texture_enc, shape_enc): if self.texture_downsample: texture_enc = tf.image.resize_nearest_neighbor( texture_enc, tf.shape(texture_enc)[1:3] \ * 2) texture_enc = tf.pad(texture_enc, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") texture_enc = tf.nn.relu(conv2d(texture_enc, self.texture_dims, 3, 1, padding='VALID', name='texture_upsample')) if self.use_shape_deconv: c0 = tf.pad(shape_enc, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT") c1 = tf.nn.relu(conv2d(c0, int(self.encoder_dims / 2), 7, 1, padding='VALID', name='shape_de_conv1')) shape_enc = tf.nn.relu(conv2d(c1, int(self.encoder_dims), 3, 2, name='shape_de_conv2')) enc = tf.concat([texture_enc, shape_enc], axis=3) # Needs double the channel because we concat the two encodings. return res_decoder(enc, layer_dims=self.decoder_dims, out_channels=self.n_channels, num_resblk=self.num_dec_resblk)
def __init__(self, args): """ defines the architecture of the model """ self.options = Options() self.alpha = args.a self.threshold = self.options.det_threshold self.iou_threshold = self.options.iou_threshold # self.classes = self.options.custom_labels self.classes = self.options.custom_labels self.image_file = self.options.image_file self.learning_rate = self.options.learning_rate # Input to the model self.x = tf.placeholder( tf.float32, shape=[None, self.options.img_x * self.options.img_y * 3]) self.lr = tf.placeholder(tf.float32) input_data = tf.reshape( self.x, [-1, self.options.img_x, self.options.img_y, 3]) self.utils = util.Utilities(self.options.annotations_dir, self.classes, self.options) # Stack the layers of the network print " Stacking layers of the network" self.conv_01 = model.conv2d(1, input_data, kernel=[7, 7, 3, 64], stride=2, name='conv_01', alpha=self.alpha, is_training=True) self.pool_02 = model.max_pool(2, self.conv_01, name='pool_02') self.conv_03 = model.conv2d(3, self.pool_02, kernel=[3, 3, 64, 192], stride=1, name='conv_03', alpha=self.alpha, is_training=True) self.pool_04 = model.max_pool(4, self.conv_03, name='pool_04') self.conv_05 = model.conv2d(5, self.pool_04, kernel=[1, 1, 192, 128], stride=1, name='conv_05', alpha=self.alpha, is_training=True) self.conv_06 = model.conv2d(6, self.conv_05, kernel=[3, 3, 128, 256], stride=1, name='conv_06', alpha=self.alpha, is_training=True) self.conv_07 = model.conv2d(7, self.conv_06, kernel=[1, 1, 256, 256], stride=1, name='conv_07', alpha=self.alpha, is_training=True) self.conv_08 = model.conv2d(8, self.conv_07, kernel=[3, 3, 256, 512], stride=1, name='conv_08', alpha=self.alpha, is_training=True) self.pool_09 = model.max_pool(9, self.conv_08, name='pool_09') self.conv_10 = model.conv2d(10, self.pool_09, kernel=[1, 1, 512, 256], stride=1, name='conv_10', alpha=self.alpha, is_training=True) self.conv_11 = model.conv2d(11, self.conv_10, kernel=[3, 3, 256, 512], stride=1, name='conv_11', alpha=self.alpha, is_training=True) self.conv_12 = model.conv2d(12, self.conv_11, kernel=[1, 1, 512, 256], stride=1, name='conv_12', alpha=self.alpha, is_training=True) self.conv_13 = model.conv2d(13, self.conv_12, kernel=[3, 3, 256, 512], stride=1, name='conv_13', alpha=self.alpha, is_training=True) self.conv_14 = model.conv2d(14, self.conv_13, kernel=[1, 1, 512, 256], stride=1, name='conv_14', alpha=self.alpha, is_training=True) self.conv_15 = model.conv2d(15, self.conv_14, kernel=[3, 3, 256, 512], stride=1, name='conv_15', alpha=self.alpha, is_training=True) self.conv_16 = model.conv2d(16, self.conv_15, kernel=[1, 1, 512, 256], stride=1, name='conv_16', alpha=self.alpha, is_training=True) self.conv_17 = model.conv2d(17, self.conv_16, kernel=[3, 3, 256, 512], stride=1, name='conv_17', alpha=self.alpha, is_training=True) self.conv_18 = model.conv2d(18, self.conv_17, kernel=[1, 1, 512, 512], stride=1, name='conv_18', alpha=self.alpha, is_training=True) self.conv_19 = model.conv2d(19, self.conv_18, kernel=[3, 3, 512, 1024], stride=1, name='conv_19', alpha=self.alpha, is_training=True) self.pool_20 = model.max_pool(20, self.conv_19, name='pool_20') self.conv_21 = model.conv2d(21, self.pool_20, kernel=[1, 1, 1024, 512], stride=1, name='conv_21', alpha=self.alpha, is_training=True) self.conv_22 = model.conv2d(22, self.conv_21, kernel=[3, 3, 512, 1024], stride=1, name='conv_22', alpha=self.alpha, is_training=True) self.conv_23 = model.conv2d(23, self.conv_22, kernel=[1, 1, 1024, 512], stride=1, name='conv_23', alpha=self.alpha, is_training=True) self.conv_24 = model.conv2d(24, self.conv_23, kernel=[3, 3, 512, 1024], stride=1, name='conv_24', alpha=self.alpha, is_training=True) self.conv_25 = model.conv2d(25, self.conv_24, kernel=[3, 3, 1024, 1024], stride=1, name='conv_25', alpha=self.alpha, is_training=True) self.conv_26 = model.conv2d(26, self.conv_25, kernel=[3, 3, 1024, 1024], stride=2, name='conv_26', alpha=self.alpha, is_training=True) self.conv_27 = model.conv2d(27, self.conv_26, kernel=[3, 3, 1024, 1024], stride=1, name='conv_27', alpha=self.alpha, is_training=True) self.conv_28 = model.conv2d(28, self.conv_27, kernel=[3, 3, 1024, 1024], stride=1, name='conv_28', alpha=self.alpha, is_training=True) # Reshape 'self.conv_28' from 4D to 2D shape = self.conv_28.get_shape().as_list() flat_shape = int(shape[1]) * int(shape[2]) * int(shape[3]) inputs_transposed = tf.transpose(self.conv_28, (0, 3, 1, 2)) fully_flat = tf.reshape(inputs_transposed, [-1, flat_shape]) self.fc_29 = model.fully_connected(29, fully_flat, 512, name='fc_29', alpha=self.alpha, is_training=True, activation=tf.nn.relu) self.fc_30 = model.fully_connected(30, self.fc_29, 4096, name='fc_30', alpha=self.alpha, is_training=True, activation=tf.nn.relu) self.fc_31 = model.fully_connected(31, self.fc_30, 1470, name='fc_31', alpha=self.alpha, is_training=True, activation=None) self.fc_32 = model.fully_connected(32, self.fc_30, self.options.O, name='fc_32', alpha=self.alpha, is_training=True, activation=None) self.predictions = self.fc_32 all_vars = tf.global_variables() # initialize these variables with random weights var_to_init = [] for var in all_vars[1:]: if int(str(var.name).split('_')[1].split(':')[0]) >= 54: var_to_init.append(var) # restore the weights of these variables var_to_restore = [] for var in all_vars: if len(str(var.name).split('_')) == 1: var_to_restore.append(var) continue elif int(str(var.name).split('_')[1].split(':')[0]) <= 53: var_to_restore.append(var) self.init_operation = tf.variables_initializer(var_to_init) self.saver1 = tf.train.Saver(var_to_restore) self.saver2 = tf.train.Saver() self.sess = tf.Session() # Build the loss operation self.loss(self.predictions) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self._loss)
def Discriminator(self, data, reuse=False): """ Discriminator part of GAN """ dims = self.opts.dims if self.opts.dataset == "CIFAR": with tf.variable_scope("discriminator"): conv1 = model.conv2d(data, [5, 5, 3, self.dims], 2, "conv1", is_training, False, reuse=reuse) conv2 = model.conv2d(conv1, [3, 3, self.dims, self.dims * 2], 2, "conv2", is_training, True, reuse=reuse, use_batch_norm=True) conv3 = model.conv2d(conv2, [3, 3, self.dims * 2, self.dims * 4], 2, "conv3", is_training, True, reuse=reuse, use_batch_norm=True) full4 = model.fully_connected(tf.reshape( conv3, [self.opts.batch_size, -1]), self.opts.label_len, is_training, None, "full4", False, reuse=reuse) return full4 else: with tf.variable_scope("discriminator"): conv1 = model.conv2d(data, [5, 5, self.c, dims], 2, "conv1", alpha=0.2, use_leak=True, bias_constant=0.01, reuse=reuse, use_batch_norm=False, is_training=self.is_training) # 14x14x64 conv2 = model.conv2d(conv1, [5, 5, dims, dims * 2], 2, "conv2", alpha=0.2, use_leak=True, bias_constant=0.01, reuse=reuse, use_batch_norm=False, is_training=self.is_training) # 7x7x128 # conv2_flat = tf.reshape(conv2, [-1, int(np.prod(conv2.get_shape()[1:]))]) conv3 = model.conv2d(conv2, [3, 3, dims * 2, dims * 4], 2, "conv3", alpha=0.2, use_leak=True, bias_constant=0.01, reuse=reuse, use_batch_norm=True, is_training=self.is_training) # 4x4x256 full1 = model.fully_connected( tf.reshape(conv3, [-1, 4 * 4 * dims * 4]), dims * 4 * 4 * 2, activation=tf.nn.relu, use_leak=True, name="full1", bias_constant=0.01, reuse=reuse, use_batch_norm=True, is_training=self.is_training) # 1 full2 = model.fully_connected( full1, dims * 4 * 4, activation=tf.nn.relu, name="full2", bias_constant=0.01, reuse=reuse, use_leak=True, use_batch_norm=True, is_training=self.is_training) # 1 output = model.fully_connected( full2, self.opts.label_len, activation=None, name="output", bias_constant=0.01, reuse=reuse, use_leak=True, use_batch_norm=True, is_training=self.is_training) # 1 # output = model.fully_connected(conv2_flat, 1, activation=None, use_leak=False, name="full1", bias_constant=0.01, reuse=reuse, use_batch_norm=False, is_training=self.is_training) # 1 return output
def encoder(self): """ Encoder to generate the `latent vector` """ dims = self.opts.dims code_len = self.opts.encoder_vec_size if self.opts.dataset == "CIFAR": with tf.variable_scope("encoder"): conv1 = model.conv2d(self.images, [3, 3, self.c, dims], 2, "conv1", alpha=0.01) # 16x16x64 conv2 = model.conv2d(conv1, [3, 3, dims, dims * 2], 2, "conv2", alpha=0.01) # 8x8x128 conv3 = model.conv2d(conv2, [3, 3, dims * 2, dims * 4], 2, "conv3", alpha=0.01) # 4x4x256 conv4 = model.conv2d(conv3, [3, 3, dims * 4, dims * 8], 2, "conv4", alpha=0.01) # 2x2x512 self.conv3_flat_len = int(np.prod(conv4.get_shape()[1:])) conv3_flat = tf.reshape(conv4, [-1, self.conv3_flat_len]) mean = model.fully_connected(conv3_flat, code_len, self.is_training, None, "full3_mean", use_leak=True, bias_constant=0.01) # 40 stds = model.fully_connected(conv3_flat, code_len, self.is_training, None, "full3_stds", use_leak=True, bias_constant=0.01) # 40 else: with tf.variable_scope("encoder"): dims = 16 conv1 = model.conv2d(self.images, [3, 3, self.c, dims], 2, "conv1", alpha=0.2, use_leak=True, bias_constant=0.01) # 14x14x16 conv2 = model.conv2d(conv1, [3, 3, dims, dims * 2], 2, "conv2", alpha=0.2, use_leak=True, bias_constant=0.01) # 7x7x32 conv2d_flat = tf.reshape(conv2, [-1, 7 * 7 * 32]) mean = model.fully_connected(conv2d_flat, code_len, self.is_training, None, "full3_mean", use_leak=True, bias_constant=0.01) # 40 stds = model.fully_connected(conv2d_flat, code_len, self.is_training, None, "full3_stds", use_leak=True, bias_constant=0.01) # 40 return mean, stds
def __init__(self, alpha): """ defines the architecture of the model """ """ Initialize variables related to training the model """ # alpha used for leaky relu self.options = Options() self.alpha = alpha self.threshold = 0.15 self.iou_threshold = 0.5 self.classes = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] self.image_file = self.options.image_file # Input to the model self.x = tf.placeholder(tf.float32, shape=[None, 448, 448, 3]) # Stack the layers of the network print " Stacking layers of the network" self.conv_01 = model.conv2d(1, self.x, kernel=[7, 7, 3, 64], stride=2, name='conv_01', alpha=self.alpha) self.pool_02 = model.max_pool(2, self.conv_01, name='pool_02') self.conv_03 = model.conv2d(3, self.pool_02, kernel=[3, 3, 64, 192], stride=1, name='conv_03', alpha=self.alpha) self.pool_04 = model.max_pool(4, self.conv_03, name='pool_04') self.conv_05 = model.conv2d(5, self.pool_04, kernel=[1, 1, 192, 128], stride=1, name='conv_05', alpha=self.alpha) self.conv_06 = model.conv2d(6, self.conv_05, kernel=[3, 3, 128, 256], stride=1, name='conv_06', alpha=self.alpha) self.conv_07 = model.conv2d(7, self.conv_06, kernel=[1, 1, 256, 256], stride=1, name='conv_07', alpha=self.alpha) self.conv_08 = model.conv2d(8, self.conv_07, kernel=[3, 3, 256, 512], stride=1, name='conv_08', alpha=self.alpha) self.pool_09 = model.max_pool(9, self.conv_08, name='pool_09') self.conv_10 = model.conv2d(10, self.pool_09, kernel=[1, 1, 512, 256], stride=1, name='conv_10', alpha=self.alpha) self.conv_11 = model.conv2d(11, self.conv_10, kernel=[3, 3, 256, 512], stride=1, name='conv_11', alpha=self.alpha) self.conv_12 = model.conv2d(12, self.conv_11, kernel=[1, 1, 512, 256], stride=1, name='conv_12', alpha=self.alpha) self.conv_13 = model.conv2d(13, self.conv_12, kernel=[3, 3, 256, 512], stride=1, name='conv_13', alpha=self.alpha) self.conv_14 = model.conv2d(14, self.conv_13, kernel=[1, 1, 512, 256], stride=1, name='conv_14', alpha=self.alpha) self.conv_15 = model.conv2d(15, self.conv_14, kernel=[3, 3, 256, 512], stride=1, name='conv_15', alpha=self.alpha) self.conv_16 = model.conv2d(16, self.conv_15, kernel=[1, 1, 512, 256], stride=1, name='conv_16', alpha=self.alpha) self.conv_17 = model.conv2d(17, self.conv_16, kernel=[3, 3, 256, 512], stride=1, name='conv_17', alpha=self.alpha) self.conv_18 = model.conv2d(18, self.conv_17, kernel=[1, 1, 512, 512], stride=1, name='conv_18', alpha=self.alpha) self.conv_19 = model.conv2d(19, self.conv_18, kernel=[3, 3, 512, 1024], stride=1, name='conv_19', alpha=self.alpha) self.pool_20 = model.max_pool(20, self.conv_19, name='pool_20') self.conv_21 = model.conv2d(21, self.pool_20, kernel=[1, 1, 1024, 512], stride=1, name='conv_21', alpha=self.alpha) self.conv_22 = model.conv2d(22, self.conv_21, kernel=[3, 3, 512, 1024], stride=1, name='conv_22', alpha=self.alpha) self.conv_23 = model.conv2d(23, self.conv_22, kernel=[1, 1, 1024, 512], stride=1, name='conv_23', alpha=self.alpha) self.conv_24 = model.conv2d(24, self.conv_23, kernel=[3, 3, 512, 1024], stride=1, name='conv_24', alpha=self.alpha) self.conv_25 = model.conv2d(25, self.conv_24, kernel=[3, 3, 1024, 1024], stride=1, name='conv_25', alpha=self.alpha) self.conv_26 = model.conv2d(26, self.conv_25, kernel=[3, 3, 1024, 1024], stride=2, name='conv_26', alpha=self.alpha) self.conv_27 = model.conv2d(27, self.conv_26, kernel=[3, 3, 1024, 1024], stride=1, name='conv_27', alpha=self.alpha) self.conv_28 = model.conv2d(28, self.conv_27, kernel=[3, 3, 1024, 1024], stride=1, name='conv_28', alpha=self.alpha) # Reshape 'self.conv_28' from 4D to 2D shape = self.conv_28.get_shape().as_list() flat_shape = int(shape[1]) * int(shape[2]) * int(shape[3]) inputs_transposed = tf.transpose(self.conv_28, (0, 3, 1, 2)) fully_flat = tf.reshape(inputs_transposed, [-1, flat_shape]) self.fc_29 = model.fully_connected(29, fully_flat, 512, name='fc_29', alpha=self.alpha, activation=tf.nn.relu) self.fc_30 = model.fully_connected(30, self.fc_29, 4096, name='fc_30', alpha=self.alpha, activation=tf.nn.relu) # skip the dropout layer self.fc_31 = model.fully_connected(31, self.fc_30, 1470, name='fc_31', alpha=self.alpha, activation=None) self.init_operation = tf.initialize_all_variables() self.saver = tf.train.Saver()