def discriminator(self, inp, cond, stages, t, reuse=False): alpha_trans = self.alpha_tra with tf.variable_scope("d_net", reuse=reuse): x_iden = None if t: x_iden = pool(inp, 2) x_iden = self.from_rgb(x_iden, stages - 2) x = self.from_rgb(inp, stages - 1) for i in range(stages - 1, 0, -1): with tf.variable_scope(self.get_conv_scope_name(i), reuse=reuse): x = conv2d(x, f=self.get_dnf(i), ks=(3, 3), s=(1, 1), act=lrelu_act()) x = conv2d(x, f=self.get_dnf(i-1), ks=(3, 3), s=(1, 1), act=lrelu_act()) x = pool(x, 2) if i == stages - 1 and t: x = tf.multiply(alpha_trans, x) + tf.multiply(tf.subtract(1., alpha_trans), x_iden) with tf.variable_scope(self.get_conv_scope_name(0), reuse=reuse): # Real/False branch cond_compress = fc(cond, units=128, act=lrelu_act()) concat = self.concat_cond4(x, cond_compress) x_b1 = conv2d(concat, f=self.get_dnf(0), ks=(3, 3), s=(1, 1), act=lrelu_act()) x_b1 = conv2d(x_b1, f=self.get_dnf(0), ks=(4, 4), s=(1, 1), padding='VALID', act=lrelu_act()) output_b1 = fc(x_b1, units=1) return output_b1
def construct_bottom_block(self, inputs, name): num_outputs = inputs.shape[self.channel_axis].value conv1 = ops.conv2d( inputs, 2*num_outputs, self.conv_size, name+'/conv1') conv2 = ops.conv2d( conv1, num_outputs, self.conv_size, name+'/conv2') return conv2
def discriminator(self, image, is_training, reuse=False): with tf.variable_scope("discriminator"): if reuse: tf.get_variable_scope().reuse_variables() # [batch,256,256,1] -> [batch,128,128,64] h0 = lrelu(conv2d(image, self.discriminator_dim, scope="d_h0_conv")) # [batch,128,128,64] -> [batch,64,64,64*2] h1 = lrelu( batch_norm(conv2d(h0, self.discriminator_dim * 2, scope="d_h1_conv"), is_training, scope="d_bn_1")) # [batch,64,64,64*2] -> [batch,32,32,64*4] h2 = lrelu( batch_norm(conv2d(h1, self.discriminator_dim * 4, scope="d_h2_conv"), is_training, scope="d_bn_2")) # [batch,32,32,64*4] -> [batch,31,31,64*8] h3 = lrelu( batch_norm(conv2d(h2, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h3_conv"), is_training, scope="d_bn_3")) # real or fake binary loss fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 1, scope="d_fc1") return tf.sigmoid(fc1), fc1
def generator(self, z, embed, is_training=True, reuse=False, cond_noise=True): s = self.output_size s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) with tf.variable_scope("g_net", reuse=reuse): # Sample from the multivariate normal distribution of the embeddings mean, log_sigma = self.generate_conditionals(embed) net_embed = self.sample_normal_conditional(mean, log_sigma, cond_noise) # -------------------------------------------------------- # Concatenate the sampled embedding with the z vector net_input = tf.concat([z, net_embed], 1) net_h0 = tf.layers.dense(net_input, units=self.gf_dim*8*s16*s16, activation=None, kernel_initializer=self.w_init) net_h0 = batch_norm(net_h0, train=is_training, init=self.batch_norm_init, act=None) net_h0 = tf.reshape(net_h0, [-1, s16, s16, self.gf_dim * 8]) # Residual layer net = conv2d(net_h0, self.gf_dim * 2, ks=(1, 1), s=(1, 1), padding='valid', init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net = conv2d(net, self.gf_dim * 2, ks=(3, 3), s=(1, 1), init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net = conv2d(net, self.gf_dim * 8, ks=(3, 3), s=(1, 1), padding='same', init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=None) net_h1 = tf.add(net_h0, net) net_h1 = tf.nn.relu(net_h1) # -------------------------------------------------------- net_h2 = conv2d_transpose(net_h1, self.gf_dim*4, ks=(4, 4), s=(2, 2), init=self.w_init) net_h2 = conv2d(net_h2, self.gf_dim*4, ks=(3, 3), s=(1, 1), init=self.w_init) net_h2 = batch_norm(net_h2, train=is_training, init=self.batch_norm_init, act=None) # -------------------------------------------------------- # Residual layer net = conv2d(net_h2, self.gf_dim, ks=(1, 1), s=(1, 1), padding='valid', init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net = conv2d(net, self.gf_dim, ks=(3, 3), s=(1, 1), init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net = conv2d(net, self.gf_dim*4, ks=(3, 3), s=(1, 1), init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=None) net_h3 = tf.add(net_h2, net) net_h3 = tf.nn.relu(net_h3) # -------------------------------------------------------- net_h4 = conv2d_transpose(net_h3, self.gf_dim*2, ks=(4, 4), s=(2, 2), init=self.w_init) net_h4 = conv2d(net_h4, self.gf_dim*2, ks=(3, 3), s=(1, 1), init=self.w_init) net_h4 = batch_norm(net_h4, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_h5 = conv2d_transpose(net_h4, self.gf_dim, ks=(4, 4), s=(2, 2), init=self.w_init) net_h5 = conv2d(net_h5, self.gf_dim, ks=(3, 3), s=(1, 1), init=self.w_init) net_h5 = batch_norm(net_h5, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_logits = conv2d_transpose(net_h5, self.image_dims[-1], ks=(4, 4), s=(2, 2), init=self.w_init) net_logits = conv2d(net_logits, self.image_dims[-1], ks=(3, 3), s=(1, 1), init=self.w_init) net_output = tf.nn.tanh(net_logits) return net_output, mean, log_sigma
def generator_encode_image(self, image, is_training=True): net_h0 = conv2d(image, self.gf_dim, ks=(3, 3), s=(1, 1), act=tf.nn.relu) net_h1 = conv2d(net_h0, self.gf_dim * 2, ks=(4, 4), s=(2, 2)) net_h1 = batch_norm(net_h1, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) output_tensor = conv2d(net_h1, self.gf_dim * 4, ks=(4, 4), s=(2, 2)) output_tensor = batch_norm(output_tensor, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) return output_tensor
def generator_residual_layer(self, input_layer, is_training=True): net_h0 = input_layer net_h1 = conv2d(net_h0, self.gf_dim * 4, ks=(4, 4), s=(1, 1)) net_h1 = batch_norm(net_h1, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_h2 = conv2d(net_h1, self.gf_dim * 4, ks=(4, 4), s=(1, 1)) net_h2 = batch_norm(net_h2, train=is_training, init=self.batch_norm_init) return tf.nn.relu(tf.add(net_h0, net_h2))
def construct_down_block(self, inputs, name, down_outputs, first=False): num_outputs = self.conf.start_channel_num if first else 2 * \ inputs.shape[self.channel_axis].value conv1 = ops.conv2d( inputs, num_outputs, self.conv_size, name+'/conv1') conv2 = ops.conv2d( conv1, num_outputs, self.conv_size, name+'/conv2',) down_outputs.append(conv2) pool = ops.pool2d( conv2, self.pool_size, name+'/pool') return pool
def _make_descriminator(self, input, phase_train): conv1 = ops.batch_norm(ops.conv2d(input, self.df_dim, name='d_h0_conv'), name='d_bn0', phase_train=phase_train) h0 = ops.lrelu(conv1) h1 = ops.lrelu( ops.batch_norm(ops.conv2d(h0, self.df_dim * 2, name='d_h1_conv'), name='d_bn1', phase_train=phase_train)) #h2 = ops.lrelu(ops.batch_norm(ops.conv2d(h1, self.df_dim*4, name='d_h2_conv'), name='d_bn2')) #h3 = ops.lrelu(ops.batch_norm(ops.conv2d(h2, self.df_dim*8, name='d_h3_conv'), name='d_bn3')) h2 = ops.lrelu(tf.reshape(h1, [self.batch_size, -1]), 1, 'd_h1_lin') return h2
def __init__(self, num_actions, state, action=None, target=None, learning_rate=None, scope='DQN'): # State - Input state to pass through the network # Action - Action for which the Q value should be predicted (only required for training) # Target - Target Q value (only required for training) self.input = state self.action = action self.target = target self.num_actions = num_actions self.scope = scope if learning_rate is not None: self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=0.95, epsilon=0.01) with tf.variable_scope(self.scope): with tf.variable_scope('input_layers'): self.input_float = tf.to_float(self.input) self.input_norm = tf.divide(self.input_float, 255.0) self.conv1 = conv2d(self.input_norm, 8, 32, 4, tf.nn.relu, scope='conv1') self.conv2 = conv2d(self.conv1, 4, 64, 2, tf.nn.relu, scope='conv2') self.conv3 = conv2d(self.conv2, 3, 64, 1, tf.nn.relu, scope='conv3') self.flatten = flatten(self.conv3, scope='flatten') self.dense = dense(self.flatten, 512, tf.nn.relu, scope='dense') self.output = dense(self.dense, self.num_actions, scope='output') self.network_params = tf.trainable_variables(scope=self.scope)
def generator(self, z_var, cond_inp, stages, t, reuse=False, cond_noise=True): alpha_trans = self.alpha_tra with tf.variable_scope('g_net', reuse=reuse): with tf.variable_scope(self.get_conv_scope_name(0), reuse=reuse): mean_lr, log_sigma_lr = self.generate_conditionals(cond_inp) cond = self.sample_normal_conditional(mean_lr, log_sigma_lr, cond_noise) # import pdb # pdb.set_trace() x = tf.concat([z_var, cond], axis=1) x = fc(x, units=4 * 4 * self.get_nf(0)) x = layer_norm(x) x = tf.reshape(x, [-1, 4, 4, self.get_nf(0)]) x = conv2d(x, f=self.get_nf(0), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = conv2d(x, f=self.get_nf(0), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x_iden = None for i in range(1, stages): if (i == stages - 1) and t: x_iden = self.to_rgb(x, stages - 2) x_iden = upscale(x_iden, 2) with tf.variable_scope(self.get_conv_scope_name(i), reuse=reuse): x = upscale(x, 2) x = conv2d(x, f=self.get_nf(i), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = conv2d(x, f=self.get_nf(i), ks=(3, 3), s=(1, 1)) x = layer_norm(x, act=tf.nn.relu) x = self.to_rgb(x, stages - 1) if t: x = tf.multiply(tf.subtract(1., alpha_trans), x_iden) + tf.multiply(alpha_trans, x) return x, mean_lr, log_sigma_lr
def encoder(self, images, is_training, reuse=False): with tf.variable_scope("generator"): if reuse: tf.get_variable_scope().reuse_variables() encode_layers = dict() def encode_layer(x, output_filters, layer): act = lrelu(x) conv = conv2d(act, output_filters=output_filters, scope="g_e%d_conv" % layer) enc = batch_norm(conv, is_training, scope="g_e%d_bn" % layer) encode_layers["e%d" % layer] = enc return enc e1 = conv2d(images, self.generator_dim, scope="g_e1_conv") encode_layers["e1"] = e1 e2 = encode_layer(e1, self.generator_dim * 2, 2) e3 = encode_layer(e2, self.generator_dim * 4, 3) e4 = encode_layer(e3, self.generator_dim * 8, 4) e5 = encode_layer(e4, self.generator_dim * 8, 5) e6 = encode_layer(e5, self.generator_dim * 8, 6) e7 = encode_layer(e6, self.generator_dim * 8, 7) e8 = encode_layer(e7, self.generator_dim * 8, 8) return e8, encode_layers
def generator(self, image, embed, is_training=True, reuse=False, sampler=False): s = 64 s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) with tf.variable_scope("stageII_g_net", reuse=reuse): encoded_img = self.generator_encode_image(image, is_training=is_training) # Sample from the multivariate normal distribution of the embeddings mean, log_sigma = self.generate_conditionals(embed) net_embed = self.sample_normal_conditional(mean, log_sigma) # -------------------------------------------------------- # Concatenate the encoded image and the embeddings net_embed = tf.expand_dims(tf.expand_dims(net_embed, 1), 1) net_embed = tf.tile(net_embed, [1, s4, s4, 1]) imgenc_embed = tf.concat([encoded_img, net_embed], 3) pre_res = conv2d(imgenc_embed, self.gf_dim * 4, ks=(3, 3), s=(1, 1)) pre_res = batch_norm(pre_res, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) r_block1 = self.generator_residual_layer(pre_res, is_training=is_training) r_block2 = self.generator_residual_layer(r_block1, is_training=is_training) r_block3 = self.generator_residual_layer(r_block2, is_training=is_training) r_block4 = self.generator_residual_layer(r_block3, is_training=is_training) return self.generator_upsample(r_block4, is_training=is_training), mean, log_sigma
def from_rgb(self, x, stage): with tf.variable_scope(self.get_rgb_name(stage)): return conv2d(x, f=self.get_dnf(stage), ks=(1, 1), s=(1, 1), act=lrelu_act())
def build_down_block(self, inputs, name, first=False, last=False): if first: num_outputs = self.conf.start_channel_num elif last: num_outputs = inputs.shape[self.channel_axis].value else: num_outputs = 2 * inputs.shape[self.channel_axis].value conv1 = ops.conv2d(inputs, num_outputs, self.conv_size, name + '/conv1') conv2 = ops.conv2d( conv1, num_outputs, self.conv_size, name + '/conv2', ) pool = ops.pool2d(conv2, self.pool_size, name + '/pool') return pool
def generator_upsample(self, input_layer, is_training=True): net_h0 = conv2d_transpose(input_layer, self.gf_dim * 2, ks=(4, 4), init=self.w_init) net_h0 = conv2d(net_h0, self.gf_dim * 2, ks=(3, 3), s=(1, 1)) net_h0 = batch_norm(net_h0, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_h1 = conv2d_transpose(net_h0, self.gf_dim, ks=(4, 4), init=self.w_init) net_h1 = conv2d(net_h1, self.gf_dim, ks=(3, 3), s=(1, 1)) net_h1 = batch_norm(net_h1, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_h2 = conv2d_transpose(net_h1, self.gf_dim // 2, ks=(4, 4), init=self.w_init) net_h2 = conv2d(net_h2, self.gf_dim // 2, ks=(3, 3), s=(1, 1)) net_h2 = batch_norm(net_h2, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) net_h3 = conv2d_transpose(net_h2, self.gf_dim // 4, ks=(4, 4), init=self.w_init) net_h3 = conv2d(net_h3, self.gf_dim // 4, ks=(3, 3), s=(1, 1)) net_h3 = batch_norm(net_h3, train=is_training, init=self.batch_norm_init, act=tf.nn.relu) return conv2d(net_h3, self.image_dims[-1], ks=(3, 3), s=(1, 1), act=tf.nn.tanh)
def encode_layer(x, output_filters, layer): act = lrelu(x) conv = conv2d(act, output_filters=output_filters, scope="g_e%d_conv" % layer) enc = batch_norm(conv, is_training, scope="g_e%d_bn" % layer) encode_layers["e%d" % layer] = enc return enc
def inference(self, images): cur_out_num = self.conf.ch_num outs = ops.conv2d( images, cur_out_num, (3, 3), 'conv_s', train=self.conf.is_train, stride=2, act_fn=None, data_format=self.conf.data_format) cur_out_num *= 2 cur_outs = ops.dw_block( # 112 * 112 * 64 outs, cur_out_num, 1, 'conv_1_0', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) outs = tf.concat([outs, cur_outs], axis=1, name='add0') cur_out_num *= 2 outs = ops.dw_block( # 56 * 56 * 128 outs, cur_out_num, 2, 'conv_1_1', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) cur_outs = ops.dw_block( # 56 * 56 * 128 outs, cur_out_num, 1, 'conv_1_2', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) outs = tf.concat([outs, cur_outs], axis=1, name='add1') #outs = tf.add(outs, cur_outs, name='add1') cur_out_num *= 2 outs = ops.dw_block( # 28 * 28 * 256 outs, cur_out_num, 2, 'conv_1_3', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) #cur_out_num *= 2 cur_outs = ops.dw_block( # 28 * 28 * 256 outs, cur_out_num, 1, 'conv_1_4', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) outs = tf.concat([outs, cur_outs], axis=1, name='add2') cur_out_num *= 2 outs = ops.dw_block( # 14 * 14 * 512 outs, cur_out_num, 2, 'conv_1_5', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) cur_outs = ops.simple_group_block( # 14 * 14 * 512 outs, self.conf.block_num, self.conf.keep_r, self.conf.is_train, 'conv_2_1', self.conf.data_format, self.conf.group_num) outs = tf.add(outs, cur_outs, name='add21') outs = self.get_block_func()( # 14 * 14 * 512 outs, self.conf.block_num, self.conf.keep_r, self.conf.is_train, 'conv_2_2', self.conf.data_format, self.conf.group_num) #outs = tf.add(outs, cur_outs, name='add22') cur_outs = self.get_block_func()( # 14 * 14 * 512 outs, self.conf.block_num, self.conf.keep_r, self.conf.is_train, 'conv_2_3', self.conf.data_format, self.conf.group_num) outs = tf.add(outs, cur_outs, name='add23') cur_out_num *= 2 outs = ops.dw_block( # 7 * 7 * 1024 outs, cur_out_num, 2, 'conv_3_0', self.conf.keep_r, self.conf.is_train, data_format=self.conf.data_format) outs = ops.dw_block( # 7 * 7 * 1024 outs, cur_out_num, 1, 'conv_3_1', self.conf.keep_r, self.conf.is_train, self.conf.use_rev_conv, self.conf.rev_kernel_size, #act_fn=None, data_format=self.conf.data_format) outs = self.get_out_func()( outs, 'out', self.conf.class_num, self.conf.is_train, data_format=self.conf.data_format) return outs
def construct_up_block(self, inputs, down_inputs, name, final=False): num_outputs = inputs.shape[self.channel_axis].value conv1 = self.deconv_func()(inputs, num_outputs, self.conv_size, name + '/conv1') conv1 = tf.concat([conv1, down_inputs], self.channel_axis, name=name + '/concat') conv2 = self.conv_func()(conv1, num_outputs, self.conv_size, name + '/conv2') num_outputs = self.conf.class_num if final else num_outputs / 2 conv3 = ops.conv2d(conv2, num_outputs, self.conv_size, name + '/conv3') return conv3
def encode_layer(x, output_filters, layer, keep_rate=1.0): # act = lrelu(x) enc = tf.nn.relu(x) enc = tf.nn.dropout(enc, keep_rate) enc = conv2d(enc, output_filters=output_filters, scope="g_e%d_conv" % layer) # batch norm is important for ae, or aw would output nothing!!! enc = batch_norm(enc, is_training, scope="g_e%d_bn" % layer) encode_layers["e%d" % layer] = enc return enc
def __init__(self, sess, img_height, img_width, c_dim, a_dim, reuse=False): self.sess = sess with tf.variable_scope("policy", reuse=reuse): #ob_ph: (mb_size, img_height, img_width, c_dim) self.ob_ph = tf.placeholder(tf.uint8, [None, img_height, img_width, c_dim], name="observation") ob_normalized = tf.cast(self.ob_ph, tf.float32) / 255.0 #conv1: (mb_size, img_height1, img_width1, 32) h = ops.conv2d(ob_normalized, 32, 8, 8, 4, 4, name="conv1") h = tf.nn.relu(h) #conv2: (mb_size, img_height2, img_width2, 64) h = ops.conv2d(h, 64, 4, 4, 2, 2, name="conv2") h = tf.nn.relu(h) #conv3: (mb_size, img_height3, img_width3, 64) h = ops.conv2d(h, 64, 3, 3, 1, 1, name="conv3") h = tf.nn.relu(h) #fc: (mb_size, 512) h = ops.fc(tf.reshape(h, [-1, h.shape[1] * h.shape[2] * h.shape[3]]), 512, name="fc1") h = tf.nn.relu(h) #pi: (mb_size, a_dim) #value: (mb_size, 1) pi = ops.fc(h, a_dim, name="fc_pi") value = ops.fc(h, 1, name="fc_value") #value: (mb_size) #action: (mb_size) self.value = value[:, 0] self.cat_dist = tf.distributions.Categorical(pi) self.action = self.cat_dist.sample(1)[0] self.pi = pi
def __init__(self, sess, img_height, img_width, c_dim, a_dim, name="policy", reuse=False): self.sess = sess with tf.variable_scope(name, reuse=reuse): #ob_ph: (mb_size, s_dim) self.ob_ph = tf.placeholder(tf.uint8, [None, img_height, img_width, c_dim], name="observation") ob_normalized = tf.cast(self.ob_ph, tf.float32) / 255.0 #conv1: (mb_size, img_height1, img_width1, 32) h = ops.conv2d(ob_normalized, 32, 8, 8, 4, 4, name="conv1") h = tf.nn.relu(h) #conv2: (mb_size, img_height2, img_width2, 64) h = ops.conv2d(h, 64, 4, 4, 2, 2, name="conv2") h = tf.nn.relu(h) #conv3: (mb_size, img_height3, img_width3, 64) h = ops.conv2d(h, 64, 3, 3, 1, 1, name="conv3") h = tf.nn.relu(h) #fc: (mb_size, 512) h = ops.fc(tf.reshape(h, [-1, h.shape[1]*h.shape[2]*h.shape[3]]), 512, name="fc1") h = tf.nn.relu(h) with tf.variable_scope("actor", reuse=reuse): #fc_logits: (mb_size, a_dim) logits = ops.fc(h, a_dim, name="a_fc_logits") with tf.variable_scope("critic", reuse=reuse): #value: (mb_size, 1) value = ops.fc(h, 1, name="c_fc_value") #value: (mb_size) #action: (mb_size) #neg_logprob: (mb_size) self.value = value[:, 0] self.distrib = distribs.CategoricalDistrib(logits) self.action = self.distrib.sample() self.neg_logprob = self.distrib.neg_logp(self.action)
def build_down_block(self, inputs, name, layer_index=0, first=False, last=False): if first: num_outputs = self.conf.start_channel_num elif last: num_outputs = inputs.shape[self.channel_axis].value else: num_outputs = 2 * inputs.shape[self.channel_axis].value print("drop out keep probabilities on layer %d: " % (layer_index), self.conf.drop_outs[layer_index]) conv1 = ops.conv2d(inputs, num_outputs, self.conv_size, name + '/conv1') conv1 = ops.dropout(conv1, self.conf.drop_outs[layer_index][0], name + '/dropout1') conv2 = ops.conv2d(conv1, num_outputs, self.conv_size, name + '/conv2') conv2 = ops.dropout(conv2, self.conf.drop_outs[layer_index][1], name + '/dropout2') if layer_index > 1: conv3 = ops.conv2d(conv2, num_outputs, self.conv_size, name + '/conv3') conv3 = ops.dropout(conv3, self.conf.drop_outs[layer_index][2], name + '/dropout3') # conv4 = ops.conv2d(conv3, num_outputs, self.conv_size, name+'/conv4',self.regularizer) # conv4 = ops.dropout(conv4,self.conf.drop_outs[layer_index][3],name+'/dropout4') pool = ops.pool2d(conv3, self.pool_size, name + '/pool') pool = ops.dropout(pool, self.conf.drop_outs[layer_index][3], name + '/dropout_pool') else: pool = ops.pool2d(conv2, self.pool_size, name + '/pool') pool = ops.dropout(pool, self.conf.drop_outs[layer_index][2], name + '/dropout_pool') return pool
def discriminator(self, inputs, embed, is_training=True, reuse=False): s16 = self.output_size / 16 lrelu = lambda l: tf.nn.leaky_relu(l, 0.2) with tf.variable_scope("d_net", reuse=reuse): net_ho = conv2d(inputs, self.df_dim, ks=(4, 4), s=(2, 2), act=lrelu, init=self.w_init) net_h1 = conv2d(net_ho, self.df_dim * 2, ks=(4, 4), s=(2, 2), init=self.w_init) net_h1 = batch_norm(net_h1, train=is_training, init=self.batch_norm_init, act=lrelu) net_h2 = conv2d(net_h1, self.df_dim * 4, ks=(4, 4), s=(2, 2), init=self.w_init) net_h2 = batch_norm(net_h2, train=is_training, init=self.batch_norm_init, act=lrelu) net_h3 = conv2d(net_h2, self.df_dim * 8, ks=(4, 4), s=(2, 2), init=self.w_init) net_h3 = batch_norm(net_h3, train=is_training, init=self.batch_norm_init) # -------------------------------------------------------- # Residual layer net = conv2d(net_h3, self.df_dim * 2, ks=(1, 1), s=(1, 1), padding='valid', init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=lrelu) net = conv2d(net, self.df_dim * 2, ks=(3, 3), s=(1, 1), init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init, act=lrelu) net = conv2d(net, self.df_dim * 8, ks=(3, 3), s=(1, 1), init=self.w_init) net = batch_norm(net, train=is_training, init=self.batch_norm_init) net_h4 = tf.add(net_h3, net) net_h4 = tf.nn.leaky_relu(net_h4, 0.2) # -------------------------------------------------------- # Compress embeddings net_embed = tf.layers.dense(embed, units=self.compressed_embed_dim, activation=lrelu) # Append embeddings in depth net_embed = tf.expand_dims(tf.expand_dims(net_embed, 1), 1) net_embed = tf.tile(net_embed, [1, 4, 4, 1]) net_h4_concat = tf.concat([net_h4, net_embed], 3) net_h4 = conv2d(net_h4_concat, self.df_dim * 8, ks=(1, 1), s=(1, 1), padding='valid', init=self.w_init) net_h4 = batch_norm(net_h4, train=is_training, init=self.batch_norm_init, act=lrelu) net_logits = conv2d(net_h4, 1, ks=(s16, s16), s=(s16, s16), padding='valid', init=self.w_init) return tf.nn.sigmoid(net_logits), net_logits
def encoder(self, images, is_training, reuse=False): """ Encoder network :param images: :param is_training: :param reuse: :return: """ with tf.variable_scope("generator"): if reuse: tf.get_variable_scope().reuse_variables() encode_layers = dict() def encode_layer(x, output_filters, layer, keep_rate=1.0): # act = lrelu(x) enc = tf.nn.relu(x) enc = tf.nn.dropout(enc, keep_rate) enc = conv2d(enc, output_filters=output_filters, scope="g_e%d_conv" % layer) # batch norm is important for ae, or aw would output nothing!!! enc = batch_norm(enc, is_training, scope="g_e%d_bn" % layer) encode_layers["e%d" % layer] = enc return enc e1 = conv2d(images, self.network_dim, scope="g_e1_env") # 128 x 128 encode_layers["e1"] = e1 e2 = encode_layer(e1, self.network_dim * 2, 2) # 64 x 64 e3 = encode_layer(e2, self.network_dim * 4, 3) # 32 x 32 e4 = encode_layer(e3, self.network_dim * 8, 4) # 16 x 16 e5 = encode_layer(e4, self.network_dim * 8, 5) # 8 x 8 e6 = encode_layer(e5, self.network_dim * 8, 6) # 4 x 4 e7 = encode_layer(e6, self.network_dim * 8, 7) # 2 x 2 e8 = encode_layer(e7, self.network_dim * 8, 8) # 1 x 1 return e8, encode_layers
def __call__(self, speech_features, is_training, reuse=False): with tf.variable_scope(self.scope, reuse=reuse): if reuse == True: tf.get_variable_scope().reuse_variables() batch_norm = BatchNorm(epsilon=1e-5, momentum=0.9) speech_features = batch_norm(speech_features, reuse=reuse, is_training=is_training) speech_feature_shape = speech_features.get_shape().as_list() speech_features_reshaped = tf.reshape(tensor=speech_features, shape=[ -1, speech_feature_shape[1], 1, speech_feature_shape[2] ]) ''' condition_shape = condition.get_shape().as_list() condition_reshaped = tf.reshape(tensor=condition, shape=[-1, condition_shape[1]]) if self._condition_speech_features: #Condition input speech feature windows speech_feature_condition = tf.transpose(tf.reshape(tensor=condition_reshaped, shape=[-1, condition_shape[1], 1, 1]), perm=[0,2,3,1]) speech_feature_condition = tf.tile(speech_feature_condition, [1, speech_feature_shape[1], 1, 1]) speech_features_reshaped = tf.concat((speech_features_reshaped, speech_feature_condition), axis=-1, name='conditioning_speech_features') ''' factor = self._speech_encoder_size_factor with tf.name_scope('conv1_time'): ''' conv1_time = tf.nn.relu(conv2d(inputs=speech_features_reshaped, n_filters=int(32*factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv1')) ''' conv1_time = conv2d(inputs=speech_features_reshaped, n_filters=int(32 * factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv1') conv1_time = tf.nn.relu( tf.contrib.layers.batch_norm(conv1_time, decay=0.9, updates_collections=None, epsilon=1e-5, center=True, scale=True, is_training=is_training, reuse=reuse, scope='BN1')) with tf.name_scope('conv2_time'): ''' conv2_time = tf.nn.relu(conv2d(inputs=conv1_time, n_filters=int(32*factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv2')) ''' conv2_time = conv2d(inputs=conv1_time, n_filters=int(32 * factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv2') conv2_time = tf.nn.relu( tf.contrib.layers.batch_norm(conv2_time, decay=0.9, updates_collections=None, epsilon=1e-5, center=True, scale=True, is_training=is_training, reuse=reuse, scope='BN2')) with tf.name_scope('conv3_time'): ''' conv3_time = tf.nn.relu(conv2d(inputs=conv2_time, n_filters=int(64*factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv3')) ''' conv3_time = conv2d(inputs=conv2_time, n_filters=int(64 * factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv3') conv3_time = tf.nn.relu( tf.contrib.layers.batch_norm(conv3_time, decay=0.9, updates_collections=None, epsilon=1e-5, center=True, scale=True, is_training=is_training, reuse=reuse, scope='BN3')) with tf.name_scope('conv4_time'): conv4_time = tf.nn.relu( conv2d(inputs=conv3_time, n_filters=int(64 * factor), k_h=3, k_w=1, stride_h=2, stride_w=1, activation=tf.identity, scope='conv4')) previous_shape = conv4_time.get_shape().as_list() time_conv_flattened = tf.reshape(conv4_time, [ -1, previous_shape[1] * previous_shape[2] * previous_shape[3] ]) #Condition audio encoding on speaker style with tf.name_scope('concat_audio_embedding'): #concatenated = tf.concat((time_conv_flattened, condition_reshaped), axis=1, name='conditioning_audio_embedding') concatenated = time_conv_flattened units_in = concatenated.get_shape().as_list()[1] with tf.name_scope('fc1'): fc1 = tf.nn.relu( fc_layer(concatenated, num_units_in=units_in, num_units_out=128, scope='fc1')) fc1 = tf.layers.dropout(fc1, rate=0.2, training=is_training) with tf.name_scope('fc2'): fc2 = tf.nn.relu( fc_layer(fc1, num_units_in=128, num_units_out=self._speech_encoding_dim, scope='fc2')) fc2 = tf.layers.dropout(fc2, rate=0.2, training=is_training) return fc2
def generator(input_, angles, reuse=False): """ Generator. Parameters ---------- input_: tensor, input images. angles: tensor, target gaze direction. reuse: bool, reuse the net if True. Returns ------- x: tensor, generated image. """ channel = 64 style_dim = angles.get_shape().as_list()[-1] angles_reshaped = tf.reshape(angles, [-1, 1, 1, style_dim]) angles_tiled = tf.tile(angles_reshaped, [1, tf.shape(input_)[1], tf.shape(input_)[2], 1]) x = tf.concat([input_, angles_tiled], axis=3) with tf.compat.v1.variable_scope('generator', reuse=reuse): # input layer x = conv2d(x, channel, d_h=1, d_w=1, scope='conv2d_input', use_bias=False, pad=3, conv_filters_dim=7) x = instance_norm(x, scope='in_input') x = relu(x) # encoder for i in range(2): x = conv2d(x, 2 * channel, d_h=2, d_w=2, scope='conv2d_%d' % i, use_bias=False, pad=1, conv_filters_dim=4) x = instance_norm(x, scope='in_conv_%d' % i) x = relu(x) channel = 2 * channel # bottleneck for i in range(6): x_a = conv2d(x, channel, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope='conv_res_a_%d' % i) x_a = instance_norm(x_a, 'in_res_a_%d' % i) x_a = relu(x_a) x_b = conv2d(x_a, channel, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope='conv_res_b_%d' % i) x_b = instance_norm(x_b, 'in_res_b_%d' % i) x = x + x_b # decoder for i in range(2): x = deconv2d(x, int(channel / 2), conv_filters_dim=4, d_h=2, d_w=2, use_bias=False, scope='deconv_%d' % i) x = instance_norm(x, scope='in_decon_%d' % i) x = relu(x) channel = int(channel / 2) x = conv2d(x, 3, conv_filters_dim=7, d_h=1, d_w=1, pad=3, use_bias=False, scope='output') x = tanh(x) return x
def discriminator(params, x_init, reuse=False): """ Discriminator. Parameters ---------- params: dict. x_init: input tensor. reuse: bool, reuse the net if True. Returns ------- x_gan: tensor, outputs for adversarial training. x_reg: tensor, outputs for gaze estimation. """ layers = 5 channel = 64 image_size = params.image_size with tf.compat.v1.variable_scope('discriminator', reuse=reuse): # 64 3 -> 32 64 -> 16 128 -> 8 256 -> 4 512 -> 2 1024 x = conv2d(x_init, channel, conv_filters_dim=4, d_h=2, d_w=2, scope='conv_0', pad=1, use_bias=True) x = lrelu(x) for i in range(1, layers): x = conv2d(x, channel * 2, conv_filters_dim=4, d_h=2, d_w=2, scope='conv_%d' % i, pad=1, use_bias=True) x = lrelu(x) channel = channel * 2 filter_size = int(image_size / 2**layers) x_gan = conv2d(x, 1, conv_filters_dim=filter_size, d_h=1, d_w=1, pad=1, scope='conv_logit_gan', use_bias=False) x_reg = conv2d(x, 2, conv_filters_dim=filter_size, d_h=1, d_w=1, pad=0, scope='conv_logit_reg', use_bias=False) x_reg = tf.reshape(x_reg, [-1, 2]) return x_gan, x_reg
def logits(self, x_onehot): batch_size = self.batch_size seq_len = self.seq_len vocab_size = self.vocab_size dis_emb_dim = self.dis_emb_dim num_rep = self.num_rep sn = self.sn # get the embedding dimension for each presentation emb_dim_single = int(dis_emb_dim / num_rep) assert isinstance(emb_dim_single, int) and emb_dim_single > 0 filter_sizes = [2, 3, 4, 5] num_filters = [300, 300, 300, 300] dropout_keep_prob = 0.75 d_embeddings = tf.get_variable('d_emb', shape=[vocab_size, dis_emb_dim], initializer=create_linear_initializer(vocab_size)) input_x_re = tf.reshape(x_onehot, [-1, vocab_size]) emb_x_re = tf.matmul(input_x_re, d_embeddings) # batch_size x seq_len x dis_emb_dim emb_x = tf.reshape(emb_x_re, [batch_size, seq_len, dis_emb_dim]) # batch_size x seq_len x dis_emb_dim x 1 emb_x_expanded = tf.expand_dims(emb_x, -1) # print('shape of emb_x_expanded: {}'.format( # emb_x_expanded.get_shape().as_list())) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for filter_size, num_filter in zip(filter_sizes, num_filters): conv = conv2d(emb_x_expanded, num_filter, k_h=filter_size, k_w=emb_dim_single, d_h=1, d_w=emb_dim_single, sn=sn, stddev=None, padding='VALID', scope="conv-%s" % filter_size) # batch_size x (seq_len-k_h+1) x num_rep x num_filter out = tf.nn.relu(conv, name="relu_new") pooled = tf.nn.max_pool(out, ksize=[1, seq_len - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool_new") # batch_size x 1 x num_rep x num_filter pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = sum(num_filters) # batch_size x 1 x num_rep x num_filters_total h_pool = tf.concat(pooled_outputs, 3) # print('shape of h_pool: {}'.format(h_pool.get_shape().as_list())) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # Add highway # (batch_size*num_rep) x num_filters_total h_highway = highway(h_pool_flat, h_pool_flat.get_shape()[1], 1, 0) # Add dropout h_drop = tf.nn.dropout(h_highway, dropout_keep_prob, name='dropout_new') # fc fc_out = linear(h_drop, output_size=100, use_bias=True, sn=sn, scope='fc_new') logits = linear(fc_out, output_size=1, use_bias=True, sn=sn, scope='logits_new') logits = tf.squeeze(logits, -1) # batch_size*num_rep return logits
def forward_pass(self, state_in, reshape=True, sigmoid_out=False, reuse=None): self.state_in = state_in shape_in = self.state_in.get_shape().as_list() # Get number of input channels for weight/bias init channels_in = shape_in[-1] with tf.variable_scope(self.scope, reuse=reuse): if reshape: # Reshape [batch_size, traj_len, H, W, C] into [batch_size*traj_len, H, W, C] self.state_in = tf.reshape( self.state_in, [-1, shape_in[2], shape_in[3], shape_in[4]]) self.conv1 = conv2d( self.state_in, self.num_filters, self.kernels[0], self.strides[0], kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(channels_in * self.kernels[0] * self.kernels[0]))), ( 1.0 / tf.sqrt( float(channels_in * self.kernels[0] * self.kernels[0])))), bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(channels_in * self.kernels[0] * self.kernels[0]))), ( 1.0 / tf.sqrt( float(channels_in * self.kernels[0] * self.kernels[0])))), scope='conv1') self.conv1 = lrelu(self.conv1, self.lrelu_alpha, scope='conv1') self.conv2 = conv2d( self.conv1, self.num_filters, self.kernels[1], self.strides[1], kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[1] * self.kernels[1]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[1] * self.kernels[1])))), bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[1] * self.kernels[1]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[1] * self.kernels[1])))), scope='conv2') self.conv2 = lrelu(self.conv2, self.lrelu_alpha, scope='conv2') self.conv3 = conv2d( self.conv2, self.num_filters, self.kernels[2], self.strides[2], kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[2] * self.kernels[2]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[2] * self.kernels[2])))), bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[2] * self.kernels[2]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[2] * self.kernels[2])))), scope='conv3') self.conv3 = lrelu(self.conv3, self.lrelu_alpha, scope='conv3') self.conv4 = conv2d( self.conv3, self.num_filters, self.kernels[3], self.strides[3], kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[3] * self.kernels[3]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[3] * self.kernels[3])))), bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt( float(self.num_filters * self.kernels[3] * self.kernels[3]))), (1.0 / tf.sqrt( float(self.num_filters * self.kernels[3] * self.kernels[3])))), scope='conv4') self.conv4 = lrelu(self.conv4, self.lrelu_alpha, scope='conv4') self.flatten = flatten(self.conv4) self.dense = dense(self.flatten, self.dense_size, kernel_init=tf.random_uniform_initializer( (-1.0 / tf.sqrt(float(self.num_filters))), (1.0 / tf.sqrt(float(self.num_filters)))), bias_init=tf.random_uniform_initializer( (-1.0 / tf.sqrt(float(self.num_filters))), (1.0 / tf.sqrt(float(self.num_filters))))) self.output = dense(self.dense, 1, kernel_init=tf.random_uniform_initializer( (-1.0 / tf.sqrt(float(self.dense_size))), (1.0 / tf.sqrt(float(self.dense_size)))), bias_init=tf.random_uniform_initializer( (-1.0 / tf.sqrt(float(self.dense_size))), (1.0 / tf.sqrt(float(self.dense_size)))), scope='output') if sigmoid_out: self.output = tf.nn.sigmoid(self.output) if reshape: # Reshape 1d reward output [batch_size*traj_len] into batches [batch_size, traj_len] self.output = tf.reshape(self.output, [-1, shape_in[1]]) self.network_params = tf.trainable_variables(scope=self.scope) return self.output
def to_rgb(self, x, stage): with tf.variable_scope(self.get_rgb_name(stage)): x = conv2d(x, f=9, ks=(2, 2), s=(1, 1), act=tf.nn.relu) x = conv2d(x, f=3, ks=(1, 1), s=(1, 1)) return x