def sentence_encoding_models(rep_tensor, rep_mask, method, activation_function, scope=None, wd=0., is_train=None, keep_prob=1., **kwargs): method_name_list = [ 'cnn_kim', 'no_ct', 'lstm', 'gru', 'sru', 'sru_normal', # rnn 'cnn', 'multi_head', 'multi_head_git', 'disa', 'mlsa', 'block' ] with tf.variable_scope(scope or 'sentence_encoding_models'): if method == 'cnn_kim': sent_coding = cnn_for_sentence_encoding(rep_tensor, rep_mask, (3, 4, 5), 200, 'sent_encoding_cnn_kim', is_train, keep_prob, wd) elif method == 'none': sent_coding = tf.reduce_sum( mask_for_high_rank(rep_tensor, rep_mask), 1) else: ct_rep = None if method == 'no_ct': ct_rep = tf.identity(rep_tensor) else: ct_rep = context_fusion_layers(rep_tensor, rep_mask, method, activation_function, None, wd, is_train, keep_prob, **kwargs) sent_coding = multi_dimensional_attention( ct_rep, rep_mask, 'multi_dim_attn_for_%s' % method, keep_prob, is_train, wd, activation_function) return sent_coding
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('sent_enc_attn'): s1_rep = multi_dimensional_attention(s1_emb, self.sent1_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = multi_dimensional_attention(s2_emb, self.sent2_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_attn') self.tensor_dict['s1_rep'] = s1_rep self.tensor_dict['s2_rep'] = s2_rep with tf.variable_scope('output'): out_rep = tf.concat( [s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1) pre_output = tf.nn.elu( linear([out_rep], hn, True, 0., scope='pre_output', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) logits = linear([pre_output], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) self.tensor_dict[logits] = logits return logits # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('hard_network'): # s1_act, s1_logpa, s2_act, s2_logpa, choose_percentage s1_act = self.sent1_token_mask s1_logpa = tf.cast(s1_act, tf.float32) s2_act = self.sent2_token_mask s2_logpa = tf.cast(s2_act, tf.float32) s1_percentage = tf.ones([bs], tf.float32) s2_percentage = tf.ones([bs], tf.float32) with tf.variable_scope('ct_attn'): s1_fw = directional_attention_with_dense( s1_emb, self.sent1_token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_fw_attn') s1_bw = directional_attention_with_dense( s1_emb, self.sent1_token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_bw_attn') s1_seq_rep = tf.concat([s1_fw, s1_bw], -1) tf.get_variable_scope().reuse_variables() s2_fw = directional_attention_with_dense( s2_emb, self.sent2_token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_fw_attn') s2_bw = directional_attention_with_dense( s2_emb, self.sent2_token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_bw_attn') s2_seq_rep = tf.concat([s2_fw, s2_bw], -1) with tf.variable_scope('sentence_enc'): s1_rep = multi_dimensional_attention( s1_seq_rep, self.sent1_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = multi_dimensional_attention( s2_seq_rep, self.sent2_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_attn') with tf.variable_scope('output'): out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1) out_rep_map = bn_dense_layer( out_rep, hn, True, 0., 'out_rep_map', 'elu', False, cfg.wd, cfg.dropout, self.is_train) pre_output1 = highway_network( out_rep_map, hn, True, 0., 'pre_output1', 'elu', False, cfg.wd, cfg.dropout, self.is_train) logits = linear([pre_output1], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage) # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('hard_network'): # for sentence 1 s1_emb_new = sequence_conditional_feature(s1_emb, self.sent1_token_mask) s1_logpa_dep, s1_act_dep, s1_percentage_dep = generate_mask_with_rl( s1_emb_new, self.sent1_token_mask, False, 'generate_mask_with_rl_dep', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s1_logpa_head, s1_act_head, s1_percentage_head = generate_mask_with_rl( s1_emb_new, self.sent1_token_mask, False, 'generate_mask_with_rl_head', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s1_logpa = tf.concat([s1_logpa_dep, s1_logpa_head], -1) s1_act = tf.logical_and(tf.expand_dims(s1_act_dep, 1), tf.expand_dims(s1_act_head, 2)) s1_percentage = s1_percentage_dep * s1_percentage_head tf.get_variable_scope().reuse_variables() # for sentence 2 s2_emb_new = sequence_conditional_feature(s2_emb, self.sent2_token_mask) s2_logpa_dep, s2_act_dep, s2_percentage_dep = generate_mask_with_rl( s2_emb_new, self.sent2_token_mask, False, 'generate_mask_with_rl_dep', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s2_logpa_head, s2_act_head, s2_percentage_head = generate_mask_with_rl( s2_emb_new, self.sent2_token_mask, False, 'generate_mask_with_rl_head', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s2_logpa = tf.concat([s2_logpa_dep, s2_logpa_head], -1) s2_act = tf.logical_and(tf.expand_dims(s2_act_dep, 1), tf.expand_dims(s2_act_head, 2)) s2_percentage = s2_percentage_dep * s2_percentage_head keep_unselected = True with tf.variable_scope('ct_attn'): s1_fw, s1_token_mask_new = directional_attention_with_selections( s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head, 'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu') s1_bw, _ = directional_attention_with_selections( s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head, 'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu') s1_seq_rep = tf.concat([s1_fw, s1_bw], -1) tf.get_variable_scope().reuse_variables() s2_fw, s2_token_mask_new = directional_attention_with_selections( s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head, 'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu') s2_bw, _ = directional_attention_with_selections( s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head, 'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu') s2_seq_rep = tf.concat([s2_fw, s2_bw], -1) with tf.variable_scope('sentence_enc'): s1_rep = multi_dimensional_attention(s1_seq_rep, s1_token_mask_new, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = multi_dimensional_attention(s2_seq_rep, s2_token_mask_new, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='s2_attn') with tf.variable_scope('output'): out_rep = tf.concat([s1_rep * s2_rep, tf.abs(s1_rep - s2_rep)], -1) out_rep_map = bn_dense_layer(out_rep, hn, True, 0., 'out_rep_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) if cfg.use_mse and cfg.mse_logits: logits = tf.nn.sigmoid( linear(out_rep_map, 1, True, 0., scope='logits', squeeze=True, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) * 2. + 3. else: logits = linear([out_rep_map], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage ) # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq) # bs,sl,tel self.tensor_dict['emb'] = emb with tf.variable_scope('ct_attn'): rep_fw = directional_attention_with_dense( emb, self.token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='fw_attn') rep_bw = directional_attention_with_dense( emb, self.token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='bw_attn') seq_rep = tf.concat([rep_fw, rep_bw], -1) with tf.variable_scope('sent_enc_attn'): rep = multi_dimensional_attention(seq_rep, self.token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='attn') with tf.variable_scope('output'): pre_logits = tf.nn.relu( linear([rep], hn, True, scope='pre_logits_linear', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) # bs, hn logits = linear([pre_logits], self.output_class, False, scope='get_output', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5 _logger.done() return logits
def build_network(self): tds, tel, hn = self.tds, self.tel, self.hn bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, scope='gene_token_emb_mat') c_emb = tf.nn.embedding_lookup(token_emb_mat, self.context_token) # bs,sn,sl,tel q_emb = tf.nn.embedding_lookup(token_emb_mat, self.question_token) # s,ql,tel with tf.variable_scope('prepro'): q_rep = multi_dimensional_attention(q_emb, self.question_token_mask, 'q2coding', cfg.dropout, self.is_train, cfg.wd, 'relu') # bs, hn q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) # bs, hn with tf.variable_scope('sent_emb'): c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel], 'c_emb_rshp') # bs*sn,sl,tel c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl], 'c_mask_rshp') # bs*sn,sl,tel sent_enc_rshp = sentence_encoding_models( c_emb_rshp, c_mask_rshp, cfg.context_fusion_method, 'relu', 'sent2enc', cfg.wd, self.is_train, cfg.dropout, hn, block_len=cfg.block_len) # bs*sn, 2*hn sent_enc = tf.reshape(sent_enc_rshp, [bs, sn, 2 * hn]) # bs,sn, 2*hn sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0., 'sent_enc_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) with tf.variable_scope('fusion'): q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1), [1, sn, 1]) # bs, sn, hn fusion_rep = tf.concat([ sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex, sent_enc_map * q_rep_map_ex ], -1) # bs,sn,4hn with tf.variable_scope('output'): out_cf = context_fusion_layers(fusion_rep, self.context_sent_mask, cfg.context_fusion_method, 'relu', 'out_cf', cfg.wd, self.is_train, cfg.dropout, hn, block_len=4) pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output', 'relu', False, cfg.wd, cfg.dropout, self.is_train) logits = get_logits( # exp masked pre_output, None, True, 0., 'logits', self.context_sent_mask, cfg.wd, cfg.dropout, self.is_train, 'linear') return logits