def bi_sru_recurrent_network( rep_tensor, rep_mask, is_train=None, keep_prob=1., wd=0., scope=None): """ :param rep_tensor: [Tensor/tf.float32] rank is 3 with shape [batch_size/bs, max_sent_len/sl, vec] :param rep_mask: [Tensor/tf.bool]rank is 2 with shape [bs,sl] :param is_train: [Scalar Tensor/tf.bool]scalar tensor to indicate whether the mode is training or not :param keep_prob: [float] dropout keep probability in the range of (0,1) :param wd: [float]for L2 regularization, if !=0, add tensors to tf collection "reg_vars" :param scope: [str]variable scope name :return: [Tensor/tf.float32] with shape [bs, sl, 2vec] for forward and backward """ bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape(rep_tensor)[2] ivec = rep_tensor.get_shape().as_list()[2] with tf.variable_scope(scope or 'bi_sru_recurrent_network'): U_d = bn_dense_layer([rep_tensor], 6 * ivec, False, 0., 'get_frc', 'linear', False, wd, keep_prob, is_train) # bs, sl, 6vec U_d_fw, U_d_bw = tf.split(U_d, 2, 2) with tf.variable_scope('forward'): U_fw = tf.concat([rep_tensor, U_d_fw], -1) fw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob) fw_output, _ = dynamic_rnn( fw_SRUCell, U_fw, tf.reduce_sum(tf.cast(rep_mask, tf.int32), -1), dtype=tf.float32, scope='forward_sru') # bs, sl, vec with tf.variable_scope('backward'): U_bw = tf.concat([rep_tensor, U_d_bw], -1) bw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob) bw_output, _ = bw_dynamic_rnn( bw_SRUCell, U_bw, tf.reduce_sum(tf.cast(rep_mask, tf.int32), -1), dtype=tf.float32, scope='backward_sru') # bs, sl, vec all_output = tf.concat([fw_output, bw_output], -1) # bs, sl, 2vec return all_output
def one_direction_rnn(tensor_rep, mask_rep, hn, cell_type, only_final=False, wd=0., keep_prob=1., is_train=None, is_forward=True, scope=None): assert not is_forward # todo: waiting to be implemented with tf.variable_scope(scope or '%s_rnn' % 'forward' if is_forward else 'backward'): reuse = None if not tf.get_variable_scope().reuse else True # print(reuse) if cell_type == 'gru': cell = tf.contrib.rnn.GRUCell(hn, reuse=reuse) elif cell_type == 'lstm': cell = tf.contrib.rnn.LSTMCell(hn, reuse=reuse) elif cell_type == 'basic_lstm': cell = tf.contrib.rnn.BasicLSTMCell(hn, reuse=reuse) elif cell_type == 'basic_rnn': cell = tf.contrib.rnn.BasicRNNCell(hn, reuse=reuse) else: raise AttributeError('no cell type \'%s\'' % cell_type) cell_dp = SwitchableDropoutWrapper(cell, is_train, keep_prob) tensor_len = tf.reduce_sum(tf.cast(mask_rep, tf.int32), -1) # [bs] rnn_outputs, _ = dynamic_rnn(cell_dp, tensor_rep, tensor_len, dtype=tf.float32) if wd > 0: add_reg_without_bias() if not only_final: return rnn_outputs # [....,sl, 2hn] else: return get_last_state(rnn_outputs, mask_rep) # [...., 2hn]
def bi_sru_recurrent_network(rep_tensor, rep_mask, is_train=None, keep_prob=1., wd=0., scope=None): bs, sl, vec = tf.shape(rep_tensor)[0], tf.shape(rep_tensor)[1], tf.shape( rep_tensor)[2] ivec = rep_tensor.get_shape().as_list()[2] with tf.variable_scope(scope or 'bi_sru_recurrent_network'): U_d = bn_dense_layer([rep_tensor], 6 * ivec, True, 0., 'get_frc', 'linear', False, wd, keep_prob, is_train) # bs, sl, 6vec U_d_fw, U_d_bw = tf.split(U_d, 2, 2) with tf.variable_scope('forward'): U_fw = tf.concat([rep_tensor, U_d_fw], -1) fw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob) fw_output, _ = dynamic_rnn(fw_SRUCell, U_fw, tf.reduce_sum( tf.cast(rep_mask, tf.int32), -1), dtype=tf.float32, scope='forward_sru') # bs, sl, vec with tf.variable_scope('backward'): U_bw = tf.concat([rep_tensor, U_d_bw], -1) bw_SRUCell = SwitchableDropoutWrapper(SRUCell(ivec, tf.nn.tanh), is_train, keep_prob) bw_output, _ = bw_dynamic_rnn(bw_SRUCell, U_bw, tf.reduce_sum( tf.cast(rep_mask, tf.int32), -1), dtype=tf.float32, scope='backward_sru') # bs, sl, vec all_output = tf.concat([fw_output, bw_output], -1) # bs, sl, 2vec return all_output
def build_network(self): with tf.name_scope('code_embeddings'): if self.model_type == 'raw': # init_code_embed = tf.random_uniform([self.vocabulary_size, self.embedding_size], -1.0, 1.0) # code_embeddings = tf.Variable(init_code_embed) init_code_embed = tf.one_hot(self.inputs, self.vocabulary_size,on_value=1.0, off_value=0.0,axis=-1) inputs_embed = bn_dense_layer(init_code_embed, self.embedding_size, True, 0., 'bn_dense_map_linear', 'linear', False, wd=0., keep_prob=1., is_train=True) elif self.model_type == 'tesa': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'delta': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'sa': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'normal': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'cbow': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'sg': init_code_embed = tesan_trans(self.model_type) # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'mce': init_code_embed = mce_trans() # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) elif self.model_type == 'glove': init_code_embed = glove_trans() # code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) else: init_code_embed = med2vec_trans() # code_embeddings = tf.constant(init_code_embed, dtype=tf.float32) code_embeddings = tf.Variable(init_code_embed, dtype=tf.float32) inputs_embed = tf.nn.embedding_lookup(code_embeddings, self.inputs) with tf.name_scope('visit_embedding'): # bs, max_visits, max_len_visit, embed_size inputs_masked = mask_for_high_rank(inputs_embed, self.inputs_mask) inputs_reduced = tf.reduce_mean(inputs_masked, 2) # batch_size, max_visits, embed_size with tf.name_scope('visit_masking'): visit_mask = tf.reduce_sum(tf.cast(self.inputs_mask, tf.int32), -1) # [bs,max_visits] visit_mask = tf.cast(visit_mask, tf.bool) tensor_len = tf.reduce_sum(tf.cast(visit_mask, tf.int32), -1) # [bs] with tf.name_scope('RNN_computaion'): reuse = None if not tf.get_variable_scope().reuse else True if cfg.cell_type == 'gru': cell = tf.contrib.rnn.GRUCell(cfg.hn, reuse=reuse) elif cfg.cell_type == 'lstm': cell = tf.contrib.rnn.LSTMCell(cfg.hn, reuse=reuse) elif cfg.cell_type == 'basic_lstm': cell = tf.contrib.rnn.BasicLSTMCell(cfg.hn, reuse=reuse) elif cfg.cell_type == 'basic_rnn': cell = tf.contrib.rnn.BasicRNNCell(cfg.hn, reuse=reuse) outputs, final_state = dynamic_rnn(cell, inputs_reduced, tensor_len, dtype=tf.float32) return outputs, final_state, tensor_len