def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent = network(input_placeholder) if isinstance(latent, tuple): if latent[1] is not None: raise NotImplementedError("DQN is not compatible with recurrent policies yet") latent = latent[0] latent = layers.flatten(latent) with tf.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def inference(self, inputs, conds, reuse=False): """ Inputs ------ inputs : tensor, float, shape=[batch_size, seq_length=100, features=10] real(from data) or fake(from G) conds : tensor, float, shape=[batch_size, swq_length=100, features=13] conditions, ball and team A reuse : bool, optional, defalt value is False if share variable Return ------ score : float real(from data) or fake(from G) """ with tf.variable_scope('C_inference', reuse=reuse): concat_ = tf.concat([conds, inputs], axis=-1) if self.if_handcraft_features: concat_ = self.data_factory.extract_features(concat_) with tf.variable_scope('conv_input') as scope: conv_input = tf.layers.conv1d( inputs=concat_, filters=self.n_filters, kernel_size=5, strides=1, padding='same', activation=libs.leaky_relu, kernel_initializer=layers.xavier_initializer(), bias_initializer=tf.zeros_initializer()) # residual block next_input = conv_input for i in range(self.n_resblock): res_block = libs.residual_block( 'Res' + str(i), next_input, n_filters=self.n_filters, n_layers=2, residual_alpha=self.residual_alpha, leaky_relu_alpha=self.leaky_relu_alpha) next_input = res_block with tf.variable_scope('conv_output') as scope: normed = layers.layer_norm(next_input) nonlinear = libs.leaky_relu(normed) conv_output = tf.layers.conv1d( inputs=nonlinear, filters=1, kernel_size=5, strides=1, padding='same', activation=libs.leaky_relu, kernel_initializer=layers.xavier_initializer(), bias_initializer=tf.zeros_initializer()) conv_output = tf.reduce_mean(conv_output, axis=1) final_ = tf.reshape(conv_output, shape=[-1]) return final_
def layer_norm(input_tensor, name=None): """Run layer normalization on the last dimension of the tensor.""" if input_tensor.dtype == tf.float16: return fused_layer_norm( inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name, use_fused_batch_norm=True) else: return contrib_layers.layer_norm( inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
def _conv_bn_relu(self, x, filters, ksize, stride): x = tf.layers.conv2d(x, filters=filters, kernel_size=ksize, strides=stride, padding='same') if self._layer_norm: x = clayer.layer_norm(x, scale=False) # else: # x = tf.layers.BatchNormalization(x) x = tf.nn.relu(x) return x
def norm(data, is_training, normtype): if normtype is None: return data if normtype.casefold() == 'instance'.casefold(): return layers.instance_norm(data) if normtype.casefold() == 'batch'.casefold(): return layers.batch_norm(data, is_training=is_training, decay=0.9) if normtype.casefold() == 'layer'.casefold(): return layers.layer_norm(data) return data
def _norm(self, inp, scope, dtype=tf.float32): shape = inp.get_shape()[-1:] gamma_init = tf.constant_initializer(self._norm_gain) beta_init = tf.constant_initializer(self._norm_shift) with tf.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. tf.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) tf.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) normalized = layers.layer_norm(inp, reuse=True, scope=scope) return normalized
def netD(input_images, batch_size, SELU, NORM, reuse=False): print 'DISCRIMINATOR reuse = '+str(reuse) sc = tf.get_variable_scope() with tf.variable_scope(sc, reuse=reuse): conv1 = tcl.conv2d(input_images, 64, 5, 2, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv1') if NORM: conv1 = tcl.layer_norm(conv1) if SELU: conv1 = selu(conv1) else: conv1 = lrelu(conv1) conv2 = tcl.conv2d(conv1, 128, 5, 2, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv2') if NORM: conv2 = tcl.layer_norm(conv2) if SELU: conv2 = selu(conv2) else: conv2 = lrelu(conv2) conv3 = tcl.conv2d(conv2, 256, 5, 2, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv3') if NORM: conv3 = tcl.layer_norm(conv3) if SELU: conv3 = selu(conv3) else: conv3 = lrelu(conv3) conv4 = tcl.conv2d(conv3, 512, 5, 2, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv4') if NORM: conv4 = tcl.layer_norm(conv4) if SELU: conv4 = selu(conv4) else: conv4 = lrelu(conv4) conv5 = tcl.conv2d(conv4, 1, 4, 1, activation_fn=tf.identity, weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv5') print 'input images:',input_images print 'conv1:',conv1 print 'conv2:',conv2 print 'conv3:',conv3 print 'conv4:',conv4 print 'conv5:',conv5 print 'END D\n' tf.add_to_collection('vars', conv1) tf.add_to_collection('vars', conv2) tf.add_to_collection('vars', conv3) tf.add_to_collection('vars', conv4) tf.add_to_collection('vars', conv5) return conv5
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def _encode_conv(self, x, conv_params, scope='obs_conv_encoding', layer_norm=False, nonlinearity='swish', spatial_softmax=False, reuse=False): with tf.variable_scope(scope, reuse=reuse): out = x for num_outputs, kernel_size, stride, padding in conv_params: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=None) if layer_norm is True: #out = layers.layer_norm(out, center=True, scale=True) out = layers.layer_norm(out) # Apply the non-linearity after layer-norm if nonlinearity == 'swish': out = tf.nn.sigmoid(out) * out #swish non-linearity elif nonlinearity == 'relu': out = tf.nn.relu(out) if spatial_softmax: shape = tf.shape(out) static_shape = out.shape height, width, num_channels = shape[1], shape[2], static_shape[ 3] pos_x, pos_y = tf.meshgrid(tf.linspace(-1., 1., num=height), tf.linspace(-1., 1., num=width), indexing='ij') pos_x = tf.reshape(pos_x, [height * width]) pos_y = tf.reshape(pos_y, [height * width]) out = tf.reshape(tf.transpose(out, [0, 3, 1, 2]), [-1, height * width]) softmax_attention = tf.nn.softmax(out) expected_x = tf.reduce_sum(pos_x * softmax_attention, [1], keep_dims=True) expected_y = tf.reduce_sum(pos_y * softmax_attention, [1], keep_dims=True) expected_xy = tf.concat([expected_x, expected_y], 1) feature_keypoints = tf.reshape(expected_xy, [-1, num_channels.value * 2]) feature_keypoints.set_shape([None, num_channels.value * 2]) return feature_keypoints else: out = layers.flatten(out) # flatten the conv output return out
def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent, _ = network(input_placeholder) latent = layers.flatten(latent) with tf.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims( action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent = network(input_placeholder) if isinstance(latent, tuple): if latent[1] is not None: raise NotImplementedError( "DQN is not compatible with recurrent policies yet") latent = latent[0] latent = layers.flatten(latent) with tf.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = layers.fully_connected( action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm( action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected( action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = layers.fully_connected( state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm( state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected( state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - \ tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _cnn_to_mlp(convs, hiddens, dueling, input_, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = input_ with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected( action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm( action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected( action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected( state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm( state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected( state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - \ tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def __init__(self, X, mask, X_decode=None, decode_mask=None, ff_layer=True): bs, length, ndim = [v.value for v in X.shape] if X_decode is None: self.q, self.k, self.v = [ tf.tanh(tf.layers.dense(X, ndim)) for _ in range(3) ] else: pass self.q_expanded = tf.expand_dims(self.q, 2) self.k_expanded = tf.expand_dims(self.k, 1) self.s_raw = tf.reduce_sum(self.q_expanded * self.k_expanded, -1) self.mask = tf.expand_dims(mask, 1) * tf.expand_dims(mask, 2) self.s = masked_softmax(self.s_raw, self.mask) self.a = self.s * self.v self.e = layer_norm(self.a + X) if ff_layer: self.output = layer_norm(tf.layers.dense(self.e, ndim) + self.e) else: self.output = self.e
def _sublayer_pre_process(layer_inputs, reuse=None): """Perform sublayer pre-processing steps. We only apply layer_norm. A note from Google's tensor2tensor repo: "The current settings ("", "dan") are the published version of the transformer. ("n", "da") seems better for harder-to-learn models, so it should probably be the default." """ return tf_layers.layer_norm(layer_inputs, scope="LayerNorm", reuse=reuse)
def _cnn_to_dist_mlp(convs, hiddens, dueling, inpt, num_actions, nb_atoms, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions * nb_atoms, activation_fn=None) if dueling: raise ValueError('Dueling not supported') # with tf.variable_scope("state_value"): # state_out = conv_out # for hidden in hiddens: # state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) # if layer_norm: # state_out = layers.layer_norm(state_out, center=True, scale=True) # state_out = tf.nn.relu(state_out) # state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) # action_scores_mean = tf.reduce_mean(action_scores, 1) # action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) # q_out = state_score + action_scores_centered else: out = tf.reshape(action_scores, shape=[-1, num_actions, nb_atoms]) #out = tf.map_fn(lambda x: tf.scalar_mul(100,x), out) # lower sparsity #out = tf.map_fn(tf.contrib.sparsemax.sparsemax, out, name='sparsemax') out = tf.nn.softmax(out, dim=-1, name='softmax') return out
def discriminator(self, image, reuse=False): with tf.variable_scope("discriminator") as scope: # image is 256 x 256 x (input_c_dim + output_c_dim) if reuse: tf.get_variable_scope().reuse_variables() scope.reuse_variables() else: assert tf.get_variable_scope().reuse == False h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) # h0 is (128 x 128 x self.df_dim) h1 = lrelu(layer_norm(conv2d(h0, self.df_dim*2, name='d_h1_conv'))) # h1 is (64 x 64 x self.df_dim*2) h2 = lrelu(layer_norm(conv2d(h1, self.df_dim*4, name='d_h2_conv'))) # h2 is (32x 32 x self.df_dim*4) h3 = lrelu(layer_norm(conv2d(h2, self.df_dim*8, d_h=1, d_w=1, name='d_h3_conv'))) # h3 is (16 x 16 x self.df_dim*8) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def build_encoder(inputs, seq_lens, activation, args, is_training): """ Builds the transformer encoder. The encoder consists of multiple layers. Each layer consists of a self-attention block, followed by a residual feed-forward block. The output is a tuple. The first item a list with length n_layers containing the outputs of each layer. The second item is a list with length n_layers containing the alignment scores of the attention heads. :param inputs: Inputs :param seq_lens: Sequence length used to mask the self attention. :param activation: activation function of the inner feed-forward layer :param args: arguments object holding hyperparameters :param is_training: bool indicator whether dropout should be applied :return: tuple(list(encoder_states), list(attention_scores)) """ inner_hsize = args.inner_hsize outer_hsize = args.outer_hsize keep_prob_inner = args.keep_prob_inner keep_prob_outer = args.keep_prob_outer keep_prob_attention = args.keep_prob_attention num_layers = args.num_layers num_heads = args.num_heads states = inputs encoder = [] alignments = [] for n in range(num_layers): with tf.compat.v1.variable_scope("layer_{}".format(n)): with tf.compat.v1.variable_scope("self_attention"): states, scores = self_attention_block( states, seq_lens=seq_lens, keep_prob_attention=keep_prob_attention, num_heads=num_heads, is_training=is_training) with tf.compat.v1.variable_scope("feed_forward"): states = residual_feedforward_block( states, inner_hsize=inner_hsize, outer_hsize=outer_hsize, activation=activation, keep_prob_inner=keep_prob_inner, keep_prob_outer=keep_prob_outer, is_training=is_training) alignments.append(scores) encoder.append(states) # normalize output of the last layer # https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/transformer_layers.py#L233 encoder[-1] = layer_norm(encoder[-1], begin_norm_axis=-1) return encoder, alignments
def add_encoder_layer(self, input, name, training, layer_to_skip_connect, local_inner_layers, num_features, dim_reduce=False, dropout_rate=0.0): """ Adds a resnet encoder layer. :param input: The input to the encoder layer :param training: Flag for training or validation :param dropout_rate: A float or a placeholder for the dropout rate :param layer_to_skip_connect: Layer to skip-connect this layer to :param local_inner_layers: A list with the inner layers of the current Multi-Layer :param num_features: Number of feature maps for the convolutions :param dim_reduce: Boolean value indicating if this is a dimensionality reducing layer or not :return: The output of the encoder layer :return: """ [b1, h1, w1, d1] = input.get_shape().as_list() if layer_to_skip_connect is not None: [b0, h0, w0, d0] = layer_to_skip_connect.get_shape().as_list() if h0 > h1: skip_connect_layer = self.conv_layer(layer_to_skip_connect, int(layer_to_skip_connect.get_shape()[3]), [3, 3], strides=(2, 2)) else: skip_connect_layer = layer_to_skip_connect else: skip_connect_layer = layer_to_skip_connect current_layers = [input, skip_connect_layer] current_layers.extend(local_inner_layers) current_layers = remove_duplicates(current_layers) outputs = tf.concat(current_layers, axis=3) if dim_reduce: outputs = self.conv_layer(outputs, num_features, [3, 3], strides=(2, 2)) outputs = leaky_relu(features=outputs) outputs = layer_norm(inputs=outputs, center=True, scale=True) outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training) else: outputs = self.conv_layer(outputs, num_features, [3, 3], strides=(1, 1)) outputs = leaky_relu(features=outputs) outputs = layer_norm(inputs=outputs, center=True, scale=True) return outputs
def arch_color_embedding_dropout(processed_obs, act_fun, n_actions, dueling, layers_width=1024): with tf.variable_scope("action_value") as scope: labs = [tf_layers.layer_norm(tf.layers.flatten(tf.concat([processed_obs[:, :, :, :, i], processed_obs[:, :, :, :, i+6]], axis=-1)), center=True, scale=True) for i in range(6)] features = [tf_layers.fully_connected(labs[i], num_outputs=layers_width, activation_fn=None, reuse=(None if i == 0 else True), scope='colour_embedding_1') for i in range(6)] features = [act_fun(feature) for feature in features] features = [tf_layers.fully_connected(features[i], num_outputs=layers_width, activation_fn=None, reuse=(None if i == 0 else True), scope='colour_embedding_2') for i in range(6)] action_out = features[0] + features[1] + features[2] + features[3] + features[4] + features[5] action_out = tf_layers.layer_norm(action_out, center=True, scale=True) action_out = act_fun(action_out) action_out = tf_layers.dropout(action_out, keep_prob=0.9) action_out = tf_layers.fully_connected(action_out, num_outputs=layers_width, activation_fn=None) action_out = tf_layers.layer_norm(action_out, center=True, scale=True) action_out = act_fun(action_out) action_out = tf_layers.dropout(action_out, keep_prob=0.9) action_scores = tf_layers.fully_connected(action_out, num_outputs=n_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value") as scope: state_out = features[0] + features[1] + features[2] + features[3] + features[4] + features[5] state_out = tf_layers.layer_norm(state_out, center=True, scale=True) state_out = act_fun(state_out) state_out = tf_layers.dropout(state_out, keep_prob=0.9) state_out = tf_layers.fully_connected(state_out, num_outputs=layers_width, activation_fn=None) state_out = tf_layers.layer_norm(state_out, center=True, scale=True) state_out = act_fun(state_out) state_out = tf_layers.dropout(state_out, keep_prob=0.9) state_score = tf_layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, axis=1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, axis=1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False, init_mean = 1.0, init_sd = 20.0): with tf.compat.v1.variable_scope(scope, reuse=reuse): out = inpt with tf.compat.v1.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, # number of output filters kernel_size=kernel_size, # filter spatial dimension stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.compat.v1.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) bias_init = [init_mean for _ in range(int(num_actions/2))] bias_init.extend([-np.log(init_sd) for _ in range(int(num_actions/2))]) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None, weights_initializer=tf.compat.v1.zeros_initializer(), biases_initializer=tf.compat.v1.constant_initializer(bias_init)) if dueling: with tf.compat.v1.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(input_tensor=action_scores, axis=1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _encode(self, features, training): inputs_BxI = features["inputs"] inputs_bias_Bx1xI = attention.ids_to_bias(inputs_BxI, self._dtype) states_BxIxD = self._embedding_layer(inputs_BxI, True) states_BxIxD = self._dropout_fn( timing.add_time_signal(states_BxIxD), training) with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE): states_BxIxD = transformer_block.stack(self._encoder_layers, training, states_BxIxD, inputs_bias_Bx1xI, None, None) states_BxIxD = contrib_layers.layer_norm(states_BxIxD, begin_norm_axis=2) return {"memory": states_BxIxD, "memory_bias": inputs_bias_Bx1xI}
def __call__(self, features, training): """Create model. Args: features: dictionary of tensors including "inputs" [batch, input_len] and "targets" [batch, output_len] training: bool of whether the mode is training. Returns: Tuple of (loss, outputs): Loss is a scalar. Output is a dictionary of tensors, containing model's output logits. """ if "inputs" not in features or "targets" not in features: raise ValueError("Require inputs and targets keys in features.") context = self._encode(features, training) self._context = context targets_BxT = features["targets"] bias_1xTxT = attention.upper_triangle_bias( tf.shape(targets_BxT)[1], self._dtype) states_BxTxD = self._embedding_layer(targets_BxT, True) states_BxTxD = tf.pad(states_BxTxD, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] states_BxTxD = timing.add_time_signal(states_BxTxD) states_BxTxD = self._dropout_fn(states_BxTxD, training) with tf.variable_scope(self._decoder_scope_name, reuse=tf.AUTO_REUSE): states_BxTxD = transformer_block.stack(self._decoder_layers, training, states_BxTxD, bias_1xTxT, context["memory"], context["memory_bias"]) states_BxTxD = contrib_layers.layer_norm(states_BxTxD, begin_norm_axis=2) logits_BxTxV = self._embedding_layer(states_BxTxD, False) targets_mask_BxT = tf.cast(tf.greater(targets_BxT, 0), self._dtype) XENT_loss = tf.losses.softmax_cross_entropy( tf.one_hot(targets_BxT, self._vocab_size), logits_BxTxV, label_smoothing=self._label_smoothing, weights=targets_mask_BxT) # want the one hot targets for sampling one_hot_targets = tf.one_hot(targets_BxT, self._vocab_size) return XENT_loss, { "logits": logits_BxTxV, "targets": targets_BxT, "one_hot_targets": one_hot_targets, "hidden_states": states_BxTxD, "context_memory": context["memory"], "context_bias": context["memory_bias"] }
def mlp_model(input, num_outputs, scope, reuse=False, num_units=64, layer_norm=False, alpha=0.01): # This model takes as input an observation and returns values of all actions with tf.variable_scope(scope, reuse=reuse): out = input out = layers.fully_connected(out, num_outputs=num_units, activation_fn=None) if layer_norm: print("Using layer_norm for actor...") # layers.batch_norm() out = layers.layer_norm(out, center=True, scale=True) # nonlinear activation # Leaky ReLU out = tf.maximum(alpha * out, out) # out = tf.nn.relu(out) out = layers.fully_connected(out, num_outputs=num_units, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) # nonlinear activation # Leaky ReLU out = tf.maximum(alpha * out, out) # out = tf.nn.relu(out) out = layers.fully_connected(out, num_outputs=num_outputs, activation_fn=None) return out
def separate(self, mixture_w): ''' Separation Network :param mixture_w: [B, K, N] :return: mask_fc: [B, K, nspk, N] ''' # 1> layer normlization [B, K, N] norm_mixture_w = layer_norm(mixture_w, begin_norm_axis=2) norm_mixture_w = tf.reshape(norm_mixture_w, (self.batch_size, self.K, N)) self.summary_layer_norm_mix = tf.summary.histogram( 'separator_layer_norm_mix_w', norm_mixture_w) # 2> 1-segment context window -> [B, K, context * N] blank_ = tf.zeros([self.batch_size, self.context_window, N], dtype=tf.float32) # [B, context_window + K + context_window, N] padded_w_ = tf.concat([blank_, norm_mixture_w, blank_], axis=1) idx = 0 new_w_ = padded_w_[:, idx:idx + self.context, :] for idx in range(1, self.K): new_w_ = tf.concat( [new_w_, padded_w_[:, idx:idx + self.context, :]], axis=1) contexted_w = tf.reshape(new_w_, [self.batch_size, self.K * self.context, N]) contexted_w = tf.reshape(contexted_w, [self.batch_size, self.K, self.context * N]) # 3> BLSTM layer [B*K, rnn_layer_size] lstm1 = self.BLSTM(contexted_w, 1) lstm2 = self.BLSTM(lstm1, 2) lstm3 = self.BLSTM(lstm2, 3) lstm4 = self.BLSTM(lstm3 + lstm2, 4) output = lstm4 # [B, hidden] lstm_out = tf.reshape( output, [-1, 2 * self.rnn_hidden]) # [B*K, 2 * rnn_hidden] self.summary_lstm_out = tf.summary.histogram('separator_lstm_out', lstm_out) # 4> FC layer [B, K, nspk, N] fc = fully_connected(inputs=lstm_out, num_outputs=self.nspk * N, activation_fn=None) mask_fc = tf.reshape(fc, [self.batch_size, self.K, self.nspk, N]) mask_fc = tf.nn.softmax(mask_fc, axis=2) self.summary_lstm_out = tf.summary.histogram('separator_lstm_out', lstm_out) return mask_fc
def _mlp(hiddens, input_, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = input_ for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: # refer to https://arxiv.org/abs/1607.06450. out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def _dist_mlp(hiddens, inpt, num_actions, nb_atoms, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) out = layers.fully_connected(out, num_outputs=num_actions * nb_atoms, activation_fn=None) out = tf.reshape(out, shape=[-1, num_actions, nb_atoms]) out = tf.nn.softmax(out, dim=-1, name='softmax') return out
def eval_fc_layer(Q, shape, activation=identity(), layer_norm=False, mask=None, mask_type=None, seed=None): if mask is not None and mask_type is None: raise ValueError( "You have to specify either 'dropout', 'zoneout' or 'shakeout' as the mask_type" ) if mask is not None and mask_type is not None: # expand mask exp_mask = tf.expand_dims(mask, 0) # define some control variables use_dropout = mask_type is 'dropout' use_zoneout = mask_type is 'zoneout' use_shakeout = mask_type is 'shakeout' else: use_dropout = False use_zoneout = False use_shakeout = False # get activation and weights initializer activation_fn, init = activation W = tf.get_variable("W", shape=shape, initializer=init(seed=seed)) b = tf.get_variable("b", shape=shape[1], initializer=tf.zeros_initializer()) # first of all shakeout has to be applied if use_shakeout: q = 0.3 W = W @ exp_mask + q * tf.sign(W) @ (exp_mask - 1) # create the network u = Q @ W + b # apply regularization if layer_norm: u = layers.layer_norm(u, center=True, scale=True) if use_dropout: u = u * exp_mask v = activation_fn(u) # last but not least apply zoneout return exp_mask * v + (1 - exp_mask) * Q if use_zoneout else v
def _mlp(hiddens, inpt, phi_sa_dim, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.tanh(out) mu_out = layers.fully_connected(out, num_outputs=phi_sa_dim, activation_fn=None) return mu_out
def self_attention_block(self, inputs, sequence_length, sublayers, scope, reuse): with tf.variable_scope(scope, reuse=reuse): l, L = sublayers inputs = layers.layer_norm(inputs) inputs = tf.layers.dropout(inputs, self.dropout) outputs = self.multihead_attention(inputs, sequence_length) outputs = self.layer_dropout(outputs, inputs, self.dropout * l / float(L)) l += 1 # FFN residual = layers.layer_norm(outputs) outputs = tf.layers.dropout(outputs, self.dropout) hiddens = tf.layers.dense(outputs, self.config.attention_size * 2, activation=tf.nn.elu) fc_outputs = tf.layers.dense(hiddens, self.config.attention_size, activation=None) outputs = self.layer_dropout(residual, fc_outputs, self.dropout * l / float(L)) return outputs, l
def _depthwise_separable_conv(self, inputs, kernel_size, num_filters, sequence_length, scope, auto, reuse=False): # inputs : [b, t, d] -> [b, t, 1, d] dims = inputs.shape.as_list() padding = "SAME" maxlen = self.parameter["sentence_length"] inputs = tf.expand_dims(inputs, axis=2) if auto: # pad the inputs for auto-regressive zeros = tf.constant([[0, 0], [kernel_size - 1, 0], [0, 0], [0, 0]]) inputs = tf.pad(inputs, zeros) padding = "VALID" maxlen = self.parameter["sentence_length"] + 1 with tf.variable_scope(scope, reuse=reuse): depthwise_filter = tf.get_variable( shape=[kernel_size, 1, dims[-1], 1], name="depth_filter", initializer=layers.xavier_initializer(), regularizer=self.regularizer) pointwise_filter = tf.get_variable( shape=[1, 1, dims[-1], num_filters], name="point_filter", initializer=layers.xavier_initializer(), regularizer=self.regularizer) outputs = tf.nn.separable_conv2d(inputs, depthwise_filter, pointwise_filter, padding=padding, strides=(1, 1, 1, 1)) # reshape to original dim [b, t, 1, d] -> [b,t,d] and apply layer norm outputs = tf.squeeze(outputs, axis=2) mask = tf.sequence_mask(sequence_length, maxlen=maxlen, dtype=tf.float32) mask = tf.expand_dims(mask, axis=2) outputs *= mask outputs = layers.layer_norm(outputs, begin_norm_axis=-1, begin_params_axis=-1) outputs = tf.nn.relu(outputs) if self.parameter["use_positional_embedding"]: outputs += self._position_embeddings(outputs, sequence_length, maxlen) return outputs
def conv_layer(x, kernel_size, stride, filter_size, name, nonlinearity=None, normalize=None, phase=None): with tf.variable_scope(name) as scope: input_channels = x.get_shape()[-1] # determine dim length_input = len(x.get_shape()) - 2 if length_input not in [2, 3]: print("conv layer does not support non 2d or 3d inputs") exit() weights = _variable( 'weights', shape=length_input * [kernel_size] + [input_channels, filter_size], initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases = _variable('biases', [filter_size], initializer=tf.constant_initializer(0.0)) if length_input == 2: conv = tf.nn.conv2d(x, weights, strides=[1, stride, stride, 1], padding='VALID') elif length_input == 3: conv = tf.nn.conv3d(x, weights, strides=[1, stride, stride, stride, 1], padding='VALID') conv = tf.nn.bias_add(conv, biases) # normalize if normalize == "batch_norm": conv = tf.layers.batch_normalization(conv, training=True, momentum=0.9) elif normalize == "layer_norm": conv = tcl.layer_norm(conv) # apply nonlinearity if nonlinearity is not None: conv = nonlinearity(conv) return conv
def layer_norm_fn(x, relu=True): x = layers.layer_norm(x, scale=True, center=True) if relu: x = tf.nn.relu(x) return x