def blstm(hidden_output, len_feas, num_cell_units, num_layers, is_train, name, num_cell_project=None, cell_type='cudnn_lstm', dropout=0.0, use_residual=False, use_layernorm=False): with tf.variable_scope(name): for i in range(num_layers): # build one layer: build block, connect block single_cell = build_cell(num_units=num_cell_units, num_layers=1, is_train=is_train, cell_type=cell_type, dropout=dropout, forget_bias=0.0, use_residual=use_residual, dim_project=num_cell_project) hidden_output, _ = cell_forward(cell=single_cell, inputs=hidden_output, index_layer=i) if use_layernorm: hidden_output = layer_norm(hidden_output) return hidden_output, len_feas
def normal_conv(inputs, filter_num, kernel, stride, padding, use_relu, name, w_initializer=None, norm_type="batch"): with tf.variable_scope(name): net = tf.layers.conv2d(inputs, filter_num, kernel, stride, padding, kernel_initializer=w_initializer, name="conv") if norm_type == "batch": net = tf.layers.batch_normalization(net, name="bn") elif norm_type == "layer": net = layer_norm(net) else: net = net output = tf.nn.relu(net) if use_relu else net return output
def residual(inputs, outputs, dropout_rate): """Residual connection. Args: inputs: A Tensor. outputs: A Tensor. dropout_rate: A float range from [0, 1). Returns: A Tensor. """ outputs = inputs + tf.nn.dropout(outputs, 1 - dropout_rate) outputs = common_layers.layer_norm(outputs) return outputs
def residual(inputs, outputs, dropout_rate): """Residual connection. Args: inputs: A Tensor. outputs: A Tensor. dropout_rate: A float range from [0, 1). Returns: A Tensor. """ if outputs.get_shape()[-1] == inputs.get_shape()[-1]: outputs = inputs + tf.nn.dropout(outputs, 1 - dropout_rate) outputs = layer_norm(outputs) return outputs
def build_single_graph(self, id_gpu, name_gpu, tensors_input): """ be used for build infer model and the train model, conditioned on self.is_train """ # build model in one device num_cell_units = self.args.model.num_cell_units cell_type = self.args.model.cell_type dropout = self.args.model.dropout forget_bias = self.args.model.forget_bias use_residual = self.args.model.use_residual hidden_output = tensors_input.feature_splits[id_gpu] with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): for i in range(self.args.model.num_lstm_layers): # build one layer: build block, connect block single_cell = build_cell( num_units=num_cell_units, num_layers=1, is_train=self.is_train, cell_type=cell_type, dropout=dropout, forget_bias=forget_bias, use_residual=use_residual) hidden_output, _ = cell_forward( cell=single_cell, inputs=hidden_output, index_layer=i) hidden_output = fully_connected( inputs=hidden_output, num_outputs=num_cell_units, activation_fn=tf.nn.tanh, scope='wx_b'+str(i)) if self.args.model.use_layernorm: hidden_output = layer_norm(hidden_output) logits = fully_connected(inputs=hidden_output, num_outputs=self.args.dim_output, activation_fn=tf.identity, scope='fully_connected') # Accuracy with tf.name_scope("label_accuracy"): correct = tf.nn.in_top_k(logits, tf.reshape(tensors_input.label_splits[id_gpu], [-1]), 1) correct = tf.multiply(tf.cast(correct, tf.float32), tf.reshape(tensors_input.mask_splits[id_gpu], [-1])) label_accuracy = tf.reduce_sum(correct) # Cross entropy loss with tf.name_scope("CE_loss"): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(tensors_input.label_splits[id_gpu], [-1]), logits=logits) cross_entropy = tf.multiply(cross_entropy, tf.reshape(tensors_input.mask_splits[id_gpu], [-1])) cross_entropy_loss = tf.reduce_sum(cross_entropy) / tf.reduce_sum(tensors_input.mask_splits[id_gpu]) loss = cross_entropy_loss if self.is_train: with tf.name_scope("gradients"): gradients = self.optimizer.compute_gradients(loss) logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Instances)) return loss, gradients if self.is_train else logits