def build_layers(self, state, action, c_names, units_1, units_2, w_i, b_i, reg=None): with tf.variable_scope('conv1'): conv1 = conv(state, [5, 5, 3, 6], [6], [1, 2, 2, 1], w_i, b_i) with tf.variable_scope('conv2'): conv2 = conv(conv1, [3, 3, 6, 12], [12], [1, 2, 2, 1], w_i, b_i) with tf.variable_scope('flatten'): flatten = tf.contrib.layers.flatten(conv2) with tf.variable_scope('dense1'): dense1 = dense(flatten, units_1, [units_1], w_i, b_i) with tf.variable_scope('dense2'): dense2 = dense(dense1, units_2, [units_2], w_i, b_i) with tf.variable_scope('concat'): concatenated = tf.concat([dense2, tf.cast(action, tf.float32)], 1) with tf.variable_scope('dense3'): dense3 = dense(concatenated, self.atoms, [self.atoms], w_i, b_i) # 返回 return tf.nn.softmax(dense3)
def _make_discriminator(inputs, data_shape, nplanes=128): diff_input = inputs['diff_img'] keep_prob = inputs['keep_prob'] g1_weights = tf.get_variable('g1_weights', [5, 5, 3, nplanes]) g1_bias = tf.get_variable('g1_bias', [nplanes]) g1 = tf.nn.relu(utils.conv2d(g1_weights, bias=g1_bias)(diff_input)) g2_weights = tf.get_variable('g2_weights', [5, 5, nplanes, nplanes]) g2_bias = tf.get_variable('g2_bias', [nplanes]) g2 = tf.nn.relu(utils.conv2d(g2_weights, bias=g2_bias)(g1)) flattened = tf.reshape(g2, [-1, data_shape[0] * data_shape[1] * nplanes]) dropout = tf.nn.dropout(flattened, keep_prob) prob_weights = tf.get_variable( 'prob_weights', [data_shape[0] * data_shape[1] * nplanes, 1]) prob_biases = tf.get_variable('prob_bias', [1]) y_logits = utils.dense(prob_weights, bias=prob_biases)(dropout) y = tf.nn.sigmoid(y_logits, name='prob_real') class_weights = tf.get_variable( 'class_weights', [data_shape[0] * data_shape[1] * nplanes, 10]) class_biases = tf.get_variable('class_bias', [1]) class_logits = utils.dense(class_weights, bias=class_biases)(dropout) class_prob = tf.nn.softmax(class_logits, name='logits_class') return { 'prob_real': y, 'logits_real': y_logits, 'prob_class': class_prob, 'logits_class': class_logits }
def __call__(self, x, y=None, sn=False, is_training=True, reuse=False): with tf.variable_scope(self.name, reuse=reuse): batch_size = x.get_shape().as_list()[0] if y is not None: ydim = y.get_shape().as_list()[-1] y = tf.reshape(y, [batch_size, 1, 1, ydim]) x = conv_cond_concat(x, y) # [bz, 28, 28, 11] x = tf.reshape(x, (batch_size, -1)) net = lrelu(dense(x, 512, sn=sn, name='d_fc1'), name='d_l1') net = lrelu(bn(dense(net, 256, sn=sn, name='d_fc2'), is_training, name='d_bn2'), name='d_l2') net = lrelu(bn(dense(net, 128, sn=sn, name='d_fc3'), is_training, name='d_bn3'), name='d_l3') yd = dense(net, 1, sn=sn, name="D_dense") if self.class_num: yc = dense(net, self.class_num, sn=sn, name='C_dense') return yd, net, yc else: return yd, net
def forward(x): # input shape (b, d) h_d0 = tf.expand_dims(x, 1) h_d1 = ff.dense(h_d0, V_d1[0] + V_d1[1] * V_d1[2], activation=tf.nn.leaky_relu) #*V_d1[2] h_d2 = ff.dense(h_d1, V_d2[0] + V_d2[1] * V_d2[2]) #*V_d2[2] return tf.reduce_sum(h_d2, 1)
def build_disc0(x, testing=False, reuse=False): disc0_l1 = x + tf.random_normal(shape=tf.shape(x), stddev=0.05) disc0_l1 = utils.lrelu( utils.conv2d(disc0_l1, (3, 3, 3, 96), name='disc0_conv1')) # 32 x 32 --> 16 x 16 disc0_l2 = utils.batch_normalization(utils.lrelu( utils.conv2d(disc0_l1, (3, 3, 96, 96), stride=[1, 2, 2, 1], name='disc0_conv2')), name='bn1', reuse=reuse) disc0_l2 = tf.nn.dropout(disc0_l2, 0.1 if testing else 1) # 16 x 16 --> 8x8 disc0_l3 = utils.batch_normalization(utils.lrelu( utils.conv2d(disc0_l2, (3, 3, 96, 192), stride=[1, 2, 2, 1], name='disc0_conv3')), name='bn2', reuse=reuse) # 8x8 --> 8x8 disc0_l4 = utils.batch_normalization(utils.lrelu( utils.conv2d(disc0_l3, (3, 3, 192, 192), name='disc0_conv4')), name='bn3', reuse=reuse) disc0_l4 = tf.nn.dropout(disc0_l4, 0.1 if testing else 1) # 8x8 --> 6x6 disc0_l5 = tf.layers.batch_normalization(utils.lrelu( utils.conv2d(disc0_l4, (3, 3, 192, 192), padding='VALID', name='disc0_conv5')), name='bn4', reuse=reuse) disc0_l5 = tf.reshape(disc0_l5, [100, 6, 6, 192]) disc0_shared = utils.lrelu( utils.network_in_network(disc0_l5, 192, num_units=192, name='disc0_shared')) disc0_shared_flat = tf.reshape(disc0_shared, [-1, 192 * 6 * 6]) disc0_z_recon = utils.dense(disc0_shared_flat, num_inputs=192 * 6 * 6, num_units=16, name='disc0_z_recon') disc0_shared_pool = tf.reduce_mean(disc0_shared, [1, 2]) disc0_adv = utils.dense(disc0_shared_pool, num_inputs=192, num_units=10, name='disc1_z_adv') # disc0_adv is the pre-softmax classification output for the discriminator return disc0_adv, disc0_z_recon
def build_gan(self): print 'source_Y', self.source_Y.get_shape().as_list() print 'enX_outputs', self.enX_outputs.get_shape().as_list() _, D_state_real = self.discriminator(self.source_Y, self.len_Y, reuse=False) _, D_state_fake = self.discriminator(self.enX_outputs, self.len_X, reuse=True) self.D_logit_real = dense(D_state_real[-1].c, 1, name="D_dense") self.D_logit_fake = dense(D_state_fake[-1].c, 1, name="D_dense", reuse=True)
def discriminator(inputs, reuse=False): with tf.variable_scope('discriminator', reuse=reuse) as d_vs: layer = dense(inputs, h_dim, activation=tf.nn.elu) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=True) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=True) d_logit = dense(layer, 1, activation=None) d_prob = tf.nn.sigmoid(d_logit) d_vars = tf.contrib.framework.get_variables(d_vs) return d_prob, d_logit, d_vars
def Generator(z, reuse=False): with tf.variable_scope('generator', reuse=reuse) as g_vs: layer = dense(z, h_dim, activation=tf.nn.elu) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=False) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=False, dropout=0.9) g = dense(layer, data_dim, activation=None) # Outputing xy pairs. g_vars = tf.contrib.framework.get_variables(g_vs) return g, g_vars
def generator(z, reuse=False): #inputs = tf.concat(axis=1, values=[z, x]) with tf.variable_scope('generator', reuse=reuse) as g_vs: layer = dense(z, h_dim, activation=tf.nn.elu) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=True) layer = dense(layer, h_dim, activation=tf.nn.elu, batch_residual=True) g = dense(layer, data_dim, activation=None) # Outputing xy pairs. g_vars = tf.contrib.framework.get_variables(g_vs) return g, g_vars
def __call__(self, x, is_training=True, reuse=False): with tf.variable_scope(self.name, reuse): net = lrelu(bn(dense(x, 64, name='c_fc1'), is_training, name='c_bn1'), name='c_l1') out_logit = dense(net, self.class_num, name='c_l2') out = tf.nn.softmax(out_logit) return out_logit, out
def build_ae(self): self.enX_outputs, _ = self.encoder_X(self.source_X, self.len_X) self.enX_outputs = tf.reshape(self.enX_outputs, (-1, self.enX_outputs.get_shape().as_list()[-1])) self.enX_outputs = dense(self.enX_outputs, 3, name='encoder_X_output') self.enX_outputs = tf.reshape(self.enX_outputs, (-1, self.batch_size, 3)) self.deX_outputs, _ = self.decoder_X(self.enX_outputs, self.len_X) self.deX_outputs = tf.reshape(self.deX_outputs, (-1, self.deX_outputs.get_shape().as_list()[-1])) self.deX_outputs = dense(self.deX_outputs, 6, name='decoder_X_output') self.deX_outputs = tf.reshape(self.deX_outputs, (-1, self.batch_size, 6))
def discriminator_from_params(x, params, isize=28 * 28, n_hid=100): #bn1 = batch_norm(name='bn1') #bn2 = batch_norm(name='bn2') hid = dense(x, n_hid, scope='l1', params=params[:2], normalized=True) hid = tf.nn.relu(hid) #hid = tf.tanh(hid) hid = dense(hid, n_hid, scope='l2', params=params[2:4], normalized=True) hid = tf.nn.relu(hid) #hid = tf.tanh(hid) out = dense(hid, 1, scope='d_out', params=params[4:]) return out
def discriminator(x, z_size, n_hid=500, isize=28 * 28, reuse=False): #bn1 = batch_norm(name='bn1') #bn2 = batch_norm(name='bn2') hid = dense(x, n_hid, scope='l1', reuse=reuse, normalized=True) hid = tf.nn.relu(hid) #hid = tf.tanh(hid) hid = dense(hid, n_hid, scope='l2', reuse=reuse, normalized=True) #hid = tf.nn.dropout(hid, 0.2) hid = tf.nn.relu(hid) #hid = tf.tanh(hid) out = dense(hid, 1, scope='d_out', reuse=reuse) return out
def build_disc1(h1, testing=False, reuse=False): # 16 x 16 --> 8x8 disc1_conv1 = utils.batch_normalization(utils.lrelu( utils.conv2d(h1, (3, 3, 3, 32), stride=[1, 2, 2, 1], name='disc1_conv1')), name='disc1_bn1', reuse=reuse) disc1_conv2 = utils.batch_normalization(utils.lrelu( utils.conv2d(disc1_conv1, (3, 3, 32, 64), name='disc1_conv2')), name='disc1_bn2', reuse=reuse) # 8x8 --> 8x8 disc1_conv3 = utils.batch_normalization(utils.lrelu( utils.conv2d(disc1_conv2, (3, 3, 64, 64), name='disc1_conv3')), name='disc1_bn3', reuse=reuse) disc1_conv3 = tf.nn.dropout(disc1_conv3, 0.1 if testing else 1) # 8x8 --> 6x6 disc1_conv4 = tf.layers.batch_normalization(utils.lrelu( utils.conv2d(disc1_conv3, (3, 3, 64, 64), padding='VALID', name='disc1_conv4')), name='bn4', reuse=reuse) disc1_l5 = tf.reshape(disc1_conv4, [100, 6, 6, 64]) disc1_shared = utils.lrelu( utils.network_in_network(disc1_l5, 64, num_units=64, name='disc1_shared')) disc1_shared_flat = tf.reshape(disc1_shared, [-1, 64 * 6 * 6]) disc1_z_recon = utils.dense(disc1_shared_flat, num_inputs=64 * 6 * 6, num_units=50, name='disc1_z_recon') disc1_shared_pool = tf.reduce_mean(disc1_shared, [1, 2]) disc1_adv = utils.dense(disc1_shared_pool, num_inputs=64, num_units=1, name='disc1_z_adv') # disc1_adv is the pre-sigmoid output of the discriminator return disc1_adv, disc1_z_recon
def build_gen1(y, z1): # y is of dimension (batch_size, 8, 8, 3) gen1_z_embed = utils.batch_normalization( tf.nn.relu( utils.dense(z1, num_inputs=50, num_units=256, bias=True, name='gen1_z_embed'))) y_flatten = tf.reshape(y, (-1, 8 * 8 * 3)) gen1_y_embed = tf.nn.relu( utils.bias(utils.batch_normalization( utils.dense(y_flatten, num_inputs=192, num_units=512, bias=False, name='gen1_y_embed')), (512, ), name='gen1_y_embed_bias')) gen1_in = tf.concat([gen1_z_embed, gen1_y_embed], axis=1) gen1_l1 = tf.transpose( tf.reshape( tf.nn.relu( utils.bias(utils.batch_normalization( utils.dense(gen1_in, num_inputs=768, num_units=1024, bias=False, name='gen1_l1')), (1024, ), name='gen1_l1_bias')), (-1, 64, 4, 4)), [0, 2, 3, 1]) gen1_l2 = tf.nn.relu( utils.bias(utils.batch_normalization( utils.conv2d_transpose(gen1_l1, (4, 4, 64, 64), (100, 11, 11, 64), bias=False, padding='VALID', stride=(1, 2, 2, 1), name='gen1_l3')), (64, ), name='gen1_l3_bias')) gen1_l3 = tf.sigmoid( utils.conv2d_transpose(gen1_l2, (6, 6, 3, 64), (100, 16, 16, 3), padding='VALID', name='gen1_l4')) return gen1_l3
def generator(z, n_hid=500, isize=28 * 28, reuse=False, use_bn=False): bn1 = batch_norm(name='bn1') bn2 = batch_norm(name='bn2') hid = dense(z, n_hid, scope='l1', reuse=reuse) if use_bn: hid = tf.nn.relu(bn1(hid, train=True)) else: hid = tf.nn.relu(hid) hid = dense(hid, n_hid, scope='l2', reuse=reuse) if use_bn: hid = tf.nn.relu(bn2(hid, train=True)) else: hid = tf.nn.relu(hid) out = tf.nn.sigmoid(dense(hid, isize, scope='g_out', reuse=reuse)) return out
def vgg(inputs, struct, dbb_biases, dbb=False): def _block(inputs, filters, dbb, dbb_bias): inputs, mem1 = conv(inputs, filters, 3, strides=1, padding='SAME', dbb=dbb, dbb_bias=dbb_bias) inputs, mem2 = batch_norm(inputs) #print(inputs) return inputs, mem1 + mem2 total_mem = 0 inputs, mem1 = _block(inputs, struct[0], dbb, dbb_biases[0]) inputs, mem2 = _block(inputs, struct[1], dbb, dbb_biases[1]) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem3 = _block(inputs, struct[2], dbb, dbb_biases[2]) inputs, mem4 = _block(inputs, struct[3], dbb, dbb_biases[3]) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem5 = _block(inputs, struct[4], dbb, dbb_biases[4]) inputs, mem6 = _block(inputs, struct[5], dbb, dbb_biases[5]) inputs, mem7 = _block(inputs, struct[6], dbb, dbb_biases[6]) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem8 = _block(inputs, struct[7], dbb, dbb_biases[7]) inputs, mem9 = _block(inputs, struct[8], dbb, dbb_biases[8]) inputs, mem10 = _block(inputs, struct[9], dbb, dbb_biases[9]) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem11 = _block(inputs, struct[10], dbb, dbb_biases[10]) inputs, mem12 = _block(inputs, struct[11], dbb, dbb_biases[11]) inputs, mem13 = _block(inputs, struct[12], dbb, dbb_biases[12]) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem_flat = flatten(inputs, struct[13], dbb, dbb_biases[13]) inputs, mem14 = dense(inputs, struct[14], dbb=dbb, dbb_bias=dbb_biases[14]) inputs, mem15 = batch_norm(inputs) inputs, mem16 = dense(inputs, struct[15], dbb=False) total_mem = mem1 + mem2 + mem3 + mem4 + \ mem5 + mem6 + mem7 + mem8 + \ mem9 + mem10 + mem11 + mem12 + \ mem13 + mem14 + mem15 + mem16 + mem_flat return inputs, total_mem
def _build_discriminator(inputs, data_shape, nplanes): diff_input = inputs['diff_img'] keep_prob = inputs['keep_prob'] g1_weights = tf.get_variable('g1_weights', [5, 5, 3, nplanes]) g1_bias = tf.get_variable('g1_bias', [nplanes]) g1 = tf.nn.relu(utils.conv2d(g1_weights, bias=g1_bias)(diff_input)) g2_weights = tf.get_variable('g2_weights', [5, 5, nplanes, nplanes]) g2_bias = tf.get_variable('g2_bias', [nplanes]) g2 = tf.nn.relu(utils.conv2d(g2_weights, bias=g2_bias)(g1)) flattened = tf.reshape(g2, [-1, data_shape[0] * data_shape[1] * nplanes]) dropout = tf.nn.dropout(flattened, keep_prob) prob_weights = tf.get_variable('prob_weights', [data_shape[0] * data_shape[1] * nplanes, 1]) prob_biases = tf.get_variable('prob_bias', [1]) y_logits = utils.dense(prob_weights, bias=prob_biases)(dropout) y = tf.nn.sigmoid(y_logits, name='prob_real') #class_weights = tf.get_variable('class_weights', [data_shape[0] * data_shape[1] * nplanes, 10]) #class_biases = tf.get_variable('class_bias', [1]) #class_logits = utils.dense(class_weights, bias=class_biases)(dropout) #class_prob = tf.nn.softmax(class_logits, name='logits_class') return {'prob': y, 'prob_logits': y_logits}#, 'prob_class': class_prob, 'logits_class': class_logits}
def train_output(self, encoder_output, Y, teacher_probs, reuse): """Calculate loss and accuracy.""" with tf.variable_scope(self.decoder_scope, reuse=reuse): logits = dense(encoder_output, self._config.dst_vocab_size, use_bias=False, name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", reuse=True if self._config.tie_embedding_and_softmax else None) # 2D to 3D preds = tf.to_int32(tf.argmax(logits, axis=-1)) mask = tf.to_float(tf.not_equal(Y, 0)) # Token-level accuracy acc = tf.reduce_sum(tf.to_float(tf.equal(preds, Y)) * mask) / tf.reduce_sum(mask) if not tf.get_variable_scope().reuse: tf.summary.scalar('accuracy', acc) if teacher_probs is not None: # Knowledge distillation loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=teacher_probs) else: # Smoothed loss loss = common_layers.smoothing_cross_entropy(logits=logits, labels=Y, vocab_size=self._config.dst_vocab_size, confidence=1 - self._config.train.label_smoothing) loss = tf.reduce_sum(loss * mask) / tf.reduce_sum(mask) return loss
def test_layers2_7_conv_weight(self): dnn = self.model(self.config_channels, self.anchors, len(self.category)) output = dnn(self.image) state_dict = dnn.state_dict() name = '.'.join(self.id().split('.')[-1].split('_')[1:]) closure = Closure(name, state_dict, self.model.scope) closure(output.grad_fn) self.assertDictEqual( closure.output, { 'layers2.7.conv.weight': 0, 'layers2.7.bn.weight': 0, 'layers2.7.bn.bias': 0, 'layers2.7.bn.running_mean': 0, 'layers2.7.bn.running_var': 0, }) self.assertDictEqual(closure.input, { 'layers3.0.conv.weight': 64, }) d = utils.dense(state_dict[name]) channels = torch.LongTensor(np.argsort(d)[int(len(d) * 0.5):]) prune(closure, channels) config_channels = model.ConfigChannels(self.config_channels.config, state_dict) dnn = self.model(config_channels, self.anchors, len(self.category)) dnn(self.image) self.assertEqual(len(channels), len(dnn.state_dict()[name]))
def train_output(self, decoder_output, Y, X, reuse): """Calculate loss and accuracy.""" with tf.variable_scope(self.decoder_scope, reuse=reuse): logits_gen = dense( decoder_output, self._config.dst_vocab_size, use_bias=False, name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", reuse=True if self._config.tie_embedding_and_softmax else None) # 2D to 3D preds_gen = tf.to_int32(tf.argmax(logits_gen, axis=-1)) mask = tf.to_float(tf.not_equal(Y, 0)) acc_gen = tf.reduce_sum( tf.to_float(tf.equal(preds_gen, Y)) * mask) / tf.reduce_sum(mask) # Smoothed loss loss_gen = common_layers.smoothing_cross_entropy( logits=logits_gen, labels=Y, vocab_size=self._config.dst_vocab_size, confidence=1 - self._config.train.label_smoothing) mean_loss_gen = tf.reduce_sum( loss_gen * mask) / (tf.reduce_sum(mask)) return acc_gen, mean_loss_gen
def test_layers3_0_conv_weight(self): dnn = self.model(self.config_channels, self.anchors, len(self.category)) output = dnn(self.image) state_dict = dnn.state_dict() name = '.'.join(self.id().split('.')[-1].split('_')[1:]) d = utils.dense(state_dict[name]) keep = torch.LongTensor(np.argsort(d)[int(len(d) * 0.5):]) modifier = Modifier( name, state_dict, dnn, lambda name, var: var[keep], lambda name, var, mapper: var[mapper(keep, len(d))], ) modifier(output.grad_fn) # check channels scope = dnn.scope(name) self.assertEqual(state_dict[name].size(0), len(keep)) self.assertEqual(state_dict[scope + '.bn.weight'].size(0), len(keep)) self.assertEqual(state_dict[scope + '.bn.bias'].size(0), len(keep)) self.assertEqual(state_dict[scope + '.bn.running_mean'].size(0), len(keep)) self.assertEqual(state_dict[scope + '.bn.running_var'].size(0), len(keep)) # check if runnable config_channels = model.ConfigChannels(self.config_channels.config, state_dict) dnn = self.model(config_channels, self.anchors, len(self.category)) dnn.load_state_dict(state_dict) dnn(self.image)
def _build_generator(inputs, data_shape, nplanes): class_cond = inputs['class_cond'] base_img = inputs['base_img'] noise = inputs['noise'] class_weights = tf.get_variable('class_weights', [10, data_shape[0] * data_shape[1]]) # We don't actually need a bias here as we are learning a bitplane per class anyways class_vec = utils.dense(class_weights)(class_cond) class_plane = tf.nn.relu( tf.reshape(class_vec, [-1, data_shape[0], data_shape[1], 1])) # Reshape the noise noise_plane = tf.reshape(noise, [-1, data_shape[0], data_shape[1], 1]) # Now concatenate the tensors stacked_input = tf.concat([base_img, noise_plane, class_plane], axis=3) g1_weights = tf.get_variable('g1_weights', [7, 7, data_shape[2] + 2, nplanes]) g1_bias = tf.get_variable('g1_bias', [nplanes]) g1 = tf.nn.relu(utils.conv2d(g1_weights, bias=g1_bias)(stacked_input)) g2_weights = tf.get_variable('g2_weights', [7, 7, nplanes, nplanes]) g2_bias = tf.get_variable('g2_bias', [nplanes]) g2 = tf.nn.relu(utils.conv2d(g2_weights, bias=g2_bias)(g1)) g3_weights = tf.get_variable('g3_weights', [5, 5, nplanes, data_shape[2]]) g3_bias = tf.get_variable('g3_bias', data_shape[2]) g3 = tf.nn.tanh( utils.conv2d(g3_weights, bias=g3_bias, name='diff_img')(g2)) return {'diff_img': g3}
def train_output(self, decoder_output, Y, reuse, decoder_scope): """Calculate loss and accuracy.""" with tf.variable_scope(decoder_scope, reuse=reuse): if self._config.is_lsoftmax is None: self._config.is_lsoftmax = False if not self._config.is_lsoftmax: logits = dense(decoder_output, self._config.dst_vocab_size, use_bias=False, name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", reuse=True if self._config.tie_embedding_and_softmax else None) else: with tf.variable_scope("dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", "dense", reuse=reuse): input_size = decoder_output.get_shape().as_list()[-1] inputs_shape = tf.unstack(tf.shape(decoder_output)) decoder_output_tmp = tf.reshape(decoder_output, [-1, input_size]) Y_tmp = tf.reshape(Y, [-1]) with tf.variable_scope(tf.get_variable_scope(), reuse=True if self._config.tie_embedding_and_softmax else None): weights = tf.get_variable("kernel", [self._config.dst_vocab_size, input_size]) weights = tf.transpose(weights) logits = lsoftmax(decoder_output_tmp, weights, Y_tmp) logits = tf.reshape(logits, inputs_shape[:-1] + [self._config.dst_vocab_size]) preds = tf.to_int32(tf.argmax(logits, axis=-1)) mask = tf.to_float(tf.not_equal(Y, 0)) acc = tf.reduce_sum(tf.to_float(tf.equal(preds, Y)) * mask) / tf.reduce_sum(mask) # Smoothed loss loss = common_layers.smoothing_cross_entropy(logits=logits, labels=Y, vocab_size=self._config.dst_vocab_size, confidence=1 - self._config.train.label_smoothing) mean_loss = tf.reduce_sum(loss * mask) / (tf.reduce_sum(mask)) return acc, mean_loss
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim * 2 + ac_dim * 2 + 2 ]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu( dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu( dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:, 0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = U.mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = U.mean( tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in var.name: vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
def __call__(self, z, y=None, is_training=True, reuse=False): with tf.variable_scope(self.name, reuse=reuse): batch_size = z.get_shape().as_list()[0] if y is not None: z = tf.concat([z, y], 1) net = tf.nn.relu( bn(dense(z, 128, name='g_fc1'), is_training, name='g_bn1')) net = tf.nn.relu( bn(dense(net, 256, name='g_fc2'), is_training, name='g_bn2')) net = tf.nn.relu( bn(dense(net, 512, name='g_fc3'), is_training, name='g_bn3')) net = tf.nn.relu( bn(dense(net, 1024, name='g_fc4'), is_training, name='g_bn4')) net = tf.nn.sigmoid(dense(net, 784, name='g_fc5')) out = tf.reshape(net, (batch_size, 28, 28, 1)) return out
def encoder_impl(self, encoder_input, is_training): attention_dropout_rate = self._config.attention_dropout_rate if is_training else 0.0 residual_dropout_rate = self._config.residual_dropout_rate if is_training else 0.0 # Mask encoder_padding = tf.equal( tf.reduce_sum(tf.abs(encoder_input), axis=-1), 0.0) encoder_output = dense(encoder_input, self._config.hidden_units, activation=tf.identity, use_bias=True, name="src_change") encoder_output = tf.contrib.layers.layer_norm(encoder_output, center=True, scale=True, trainable=True) # Add positional signal encoder_output = common_attention.add_timing_signal_1d(encoder_output) # Dropout encoder_output = tf.layers.dropout(encoder_output, rate=residual_dropout_rate, training=is_training) # Blocks for i in range(self._config.num_blocks_enc): with tf.variable_scope("block_{}".format(i)): # Multihead Attention encoder_output = residual( encoder_output, multihead_attention( query_antecedent=encoder_output, memory_antecedent=None, bias=common_attention.attention_bias_ignore_padding( encoder_padding), total_key_depth=self._config.hidden_units, total_value_depth=self._config.hidden_units, output_depth=self._config.hidden_units, num_heads=self._config.num_heads, dropout_rate=attention_dropout_rate, name='encoder_self_attention', summaries=True), dropout_rate=residual_dropout_rate) # Feed Forward encoder_output = residual( encoder_output, ff_hidden(inputs=encoder_output, hidden_size=4 * self._config.hidden_units, output_size=self._config.hidden_units, activation=self._ff_activation), dropout_rate=residual_dropout_rate) # Mask padding part to zeros. encoder_output *= tf.expand_dims(1.0 - tf.to_float(encoder_padding), axis=-1) return encoder_output
def __call__(self, x, y=None, sn=False, is_training=True, reuse=False): with tf.variable_scope(self.name, reuse=reuse): batch_size = x.get_shape().as_list()[0] if y is not None: ydim = y.get_shape().as_list()[-1] y = tf.reshape(y, [batch_size, 1, 1, ydim]) x = conv_cond_concat(x, y) # [bz, 28, 28, 11] # [bz, 14, 14, 64] net = lrelu(conv2d(x, 64, 4, 4, 2, 2, sn=sn, padding="SAME", name='d_conv1'), name='d_l1') # [bz, 7, 7, 128] net = lrelu(bn(conv2d(net, 128, 4, 4, 2, 2, sn=sn, padding="SAME", name='d_conv2'), is_training, name='d_bn2'), name='d_l2') net = tf.reshape(net, [batch_size, 7 * 7 * 128]) # [bz, 1024] net = lrelu(bn(dense(net, 1024, sn=sn, name='d_fc3'), is_training, name='d_bn3'), name='d_l3') # [bz, 1] yd = dense(net, 1, sn=sn, name='D_dense') if self.class_num: yc = dense(net, self.class_num, sn=sn, name='C_dense') return yd, net, yc else: return yd, net
def test_output(self, decoder_output, reuse): """During test, we only need the last prediction at each time.""" with tf.variable_scope("decoder", reuse=reuse): last_logits = dense(decoder_output[:,-1], self._config.dst_vocab_size, use_bias=False, name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", reuse=True if self._config.tie_embedding_and_softmax else None) last_preds = tf.to_int32(tf.argmax(last_logits, axis=-1)) z = tf.nn.log_softmax(last_logits) last_k_scores, last_k_preds = tf.nn.top_k(z, k=self._config.test.beam_size, sorted=False) last_k_preds = tf.to_int32(last_k_preds) return last_preds, last_k_preds, last_k_scores
def test_loss(self, decoder_output, Y, reuse): """This function help users to compute PPL during test.""" with tf.variable_scope("decoder", reuse=reuse): logits = dense(decoder_output, self._config.dst_vocab_size, use_bias=False, name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax", reuse=True if self._config.tie_embedding_and_softmax else None) mask = tf.to_float(tf.not_equal(Y, 0)) labels = tf.one_hot(Y, depth=self._config.dst_vocab_size) loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) loss_sum = tf.reduce_sum(loss * mask) return loss_sum
def lenet_conv(inputs, struct, dbb=False): total_mem = 0 if dbb: inputs, mem1 = conv(inputs, struct[0], 5, strides=1, padding='VALID', \ dbb=dbb, dbb_bias=13) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem2 = conv(inputs, struct[1], 5, strides=1, padding='VALID', \ dbb=dbb, dbb_bias=25) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem3 = flatten(inputs, struct[2], dbb=dbb, dbb_bias=156) inputs, mem4 = dense(inputs, struct[3], dbb=dbb, dbb_bias=54) inputs, mem5 = dense(inputs, 10, dbb=False) else: inputs, mem1 = conv(inputs, struct[0], 5, strides=1, padding='VALID', dbb=dbb) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem2 = conv(inputs, struct[1], 5, strides=1, padding='VALID', dbb=dbb) inputs = pool(inputs, 2, strides=2, padding='VALID') inputs, mem3 = flatten(inputs, struct[2], dbb=dbb) inputs, mem4 = dense(inputs, struct[3], dbb=dbb) inputs, mem5 = dense(inputs, 10, dbb=False) total_mem = mem1 + mem2 + mem3 + mem4 + mem5 return inputs, total_mem
def _build_autoencoder(inputs, data_shape, num_planes, latent_dim): base_img = inputs['base_img'] class_cond = inputs['class_cond'] # We don't actually need a bias here as we are learning a bitplane per class anyways class_weights = tf.get_variable('class_weights', [10, data_shape[0] * data_shape[1]]) class_vec = utils.dense(class_weights)(class_cond) class_plane = tf.nn.relu(tf.reshape(class_vec, [-1, data_shape[0], data_shape[1], 1])) # Now concatenate the tensors stacked_input = tf.concat([base_img, class_plane], axis=3) c1_weights = tf.get_variable('d1_weights', [7, 7, data_shape[2] + 1, num_planes]) c1_bias = tf.get_variable('d1_bias', [num_planes]) c1 = utils.conv2d(c1_weights, bias=c1_bias)(stacked_input) c2_weights = tf.get_variable('d2_weights', [7, 7, num_planes, num_planes]) c2_bias = tf.get_variable('d2_bias', [num_planes]) c2 = utils.conv2d(c2_weights, bias=c2_bias)(c1) c2_dropout = tf.nn.dropout(c2, inputs['keep_prob']) c3_weights = tf.get_variable('d3_weights', [7, 7, num_planes, num_planes]) c3_bias = tf.get_variable('d3_bias', [num_planes]) c3 = utils.conv2d(c3_weights, bias=c3_bias)(c2_dropout) c4_weights = tf.get_variable('d4_weights', [7, 7, num_planes, num_planes]) c4_bias = tf.get_variable('d4_bias', [num_planes]) c4 = utils.conv2d(c4_weights, bias=c4_bias)(c3) c4_dropout = tf.nn.dropout(c4, inputs['keep_prob']) c5_weights = tf.get_variable('d5_weights', [7, 7, num_planes, 3]) c5_bias = tf.get_variable('d5_bias', [3]) c5 = utils.conv2d(c5_weights, bias=c5_bias)(c4_dropout) diff_img = c5 return {'diff_img': diff_img}