def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs = self.bs with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq) # bs,sl1,tel with tf.variable_scope('sent_encoding'): rep = sentence_encoding_models( emb, self.token_mask, cfg.context_fusion_method, 'relu', 'ct_based_sent2vec', cfg.wd, self.is_train, cfg.dropout, block_len=cfg.block_len) with tf.variable_scope('output'): pre_logits = tf.nn.relu(linear([rep], hn, True, scope='pre_logits_linear', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) # bs, hn logits = linear([pre_logits], self.output_class, False, scope='get_output', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5 _logger.done() return logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq) # bs,sl,tel self.tensor_dict['emb'] = emb rep = disan( emb, self.token_mask, 'DiSAN', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='') with tf.variable_scope('output'): pre_logits = tf.nn.relu(linear([rep], hn, True, scope='pre_logits_linear', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) # bs, hn logits = linear([pre_logits], self.output_class, False, scope='get_output', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5 _logger.done() return logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('sent_enc_attn'): s1_rep = traditional_attention( s1_emb, self.sent1_token_mask, 'traditional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = traditional_attention( s2_emb, self.sent2_token_mask, 'traditional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_attn') self.tensor_dict['s1_rep'] = s1_rep self.tensor_dict['s2_rep'] = s2_rep with tf.variable_scope('output'): out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1) pre_output = tf.nn.elu(linear([out_rep], hn, True, 0., scope= 'pre_output', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout,is_train=self.is_train)) logits = linear([pre_output], self.output_class, True, 0., scope= 'logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout,is_train=self.is_train) self.tensor_dict[logits] = logits return logits # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('hard_network'): # s1_act, s1_logpa, s2_act, s2_logpa, choose_percentage s1_act = self.sent1_token_mask s1_logpa = tf.cast(s1_act, tf.float32) s2_act = self.sent2_token_mask s2_logpa = tf.cast(s2_act, tf.float32) s1_percentage = tf.ones([bs], tf.float32) s2_percentage = tf.ones([bs], tf.float32) with tf.variable_scope('ct_attn'): s1_fw = directional_attention_with_dense( s1_emb, self.sent1_token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_fw_attn') s1_bw = directional_attention_with_dense( s1_emb, self.sent1_token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_bw_attn') s1_seq_rep = tf.concat([s1_fw, s1_bw], -1) tf.get_variable_scope().reuse_variables() s2_fw = directional_attention_with_dense( s2_emb, self.sent2_token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_fw_attn') s2_bw = directional_attention_with_dense( s2_emb, self.sent2_token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_bw_attn') s2_seq_rep = tf.concat([s2_fw, s2_bw], -1) with tf.variable_scope('sentence_enc'): s1_rep = multi_dimensional_attention( s1_seq_rep, self.sent1_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = multi_dimensional_attention( s2_seq_rep, self.sent2_token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, tensor_dict=self.tensor_dict, name='s2_attn') with tf.variable_scope('output'): out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1) out_rep_map = bn_dense_layer( out_rep, hn, True, 0., 'out_rep_map', 'elu', False, cfg.wd, cfg.dropout, self.is_train) pre_output1 = highway_network( out_rep_map, hn, True, 0., 'pre_output1', 'elu', False, cfg.wd, cfg.dropout, self.is_train) logits = linear([pre_output1], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage) # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('sent_encoding'): act_func_str = 'elu' if cfg.context_fusion_method in [ 'block', 'disa' ] else 'relu' s1_rep = sentence_encoding_models(s1_emb, self.sent1_token_mask, cfg.context_fusion_method, act_func_str, 'ct_based_sent2vec', cfg.wd, self.is_train, cfg.dropout, block_len=cfg.block_len) tf.get_variable_scope().reuse_variables() s2_rep = sentence_encoding_models(s2_emb, self.sent2_token_mask, cfg.context_fusion_method, act_func_str, 'ct_based_sent2vec', cfg.wd, self.is_train, cfg.dropout, block_len=cfg.block_len) self.tensor_dict['s1_rep'] = s1_rep self.tensor_dict['s2_rep'] = s2_rep with tf.variable_scope('output'): act_func = tf.nn.elu if cfg.context_fusion_method in [ 'block', 'disa' ] else tf.nn.relu out_rep = tf.concat( [s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1) pre_output = act_func( linear([out_rep], hn, True, 0., scope='pre_output', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) logits = linear([pre_output], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) self.tensor_dict[logits] = logits return logits # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl1, sl2 = self.bs, self.sl1, self.sl2 with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token) # bs,sl1,tel s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token) # bs,sl2,tel self.tensor_dict['s1_emb'] = s1_emb self.tensor_dict['s2_emb'] = s2_emb with tf.variable_scope('hard_network'): # for sentence 1 s1_emb_new = sequence_conditional_feature(s1_emb, self.sent1_token_mask) s1_logpa_dep, s1_act_dep, s1_percentage_dep = generate_mask_with_rl( s1_emb_new, self.sent1_token_mask, False, 'generate_mask_with_rl_dep', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s1_logpa_head, s1_act_head, s1_percentage_head = generate_mask_with_rl( s1_emb_new, self.sent1_token_mask, False, 'generate_mask_with_rl_head', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s1_logpa = tf.concat([s1_logpa_dep, s1_logpa_head], -1) s1_act = tf.logical_and(tf.expand_dims(s1_act_dep, 1), tf.expand_dims(s1_act_head, 2)) s1_percentage = s1_percentage_dep * s1_percentage_head tf.get_variable_scope().reuse_variables() # for sentence 2 s2_emb_new = sequence_conditional_feature(s2_emb, self.sent2_token_mask) s2_logpa_dep, s2_act_dep, s2_percentage_dep = generate_mask_with_rl( s2_emb_new, self.sent2_token_mask, False, 'generate_mask_with_rl_dep', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s2_logpa_head, s2_act_head, s2_percentage_head = generate_mask_with_rl( s2_emb_new, self.sent2_token_mask, False, 'generate_mask_with_rl_head', cfg.dropout, self.is_train, cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode, cfg.start_only_rl, hn) # [bs, sl] & [bs, sl] s2_logpa = tf.concat([s2_logpa_dep, s2_logpa_head], -1) s2_act = tf.logical_and(tf.expand_dims(s2_act_dep, 1), tf.expand_dims(s2_act_head, 2)) s2_percentage = s2_percentage_dep * s2_percentage_head keep_unselected = True with tf.variable_scope('ct_attn'): s1_fw, s1_token_mask_new = directional_attention_with_selections( s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head, 'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu') s1_bw, _ = directional_attention_with_selections( s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head, 'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu') s1_seq_rep = tf.concat([s1_fw, s1_bw], -1) tf.get_variable_scope().reuse_variables() s2_fw, s2_token_mask_new = directional_attention_with_selections( s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head, 'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu') s2_bw, _ = directional_attention_with_selections( s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head, 'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu') s2_seq_rep = tf.concat([s2_fw, s2_bw], -1) with tf.variable_scope('sentence_enc'): s1_rep = multi_dimensional_attention(s1_seq_rep, s1_token_mask_new, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='s1_attn') tf.get_variable_scope().reuse_variables() s2_rep = multi_dimensional_attention(s2_seq_rep, s2_token_mask_new, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='s2_attn') with tf.variable_scope('output'): out_rep = tf.concat([s1_rep * s2_rep, tf.abs(s1_rep - s2_rep)], -1) out_rep_map = bn_dense_layer(out_rep, hn, True, 0., 'out_rep_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) if cfg.use_mse and cfg.mse_logits: logits = tf.nn.sigmoid( linear(out_rep_map, 1, True, 0., scope='logits', squeeze=True, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) * 2. + 3. else: logits = linear([out_rep_map], self.output_class, True, 0., scope='logits', squeeze=False, wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage ) # logits
def build_network(self): _logger.add() _logger.add('building %s neural network structure...' % cfg.network_type) tds, cds = self.tds, self.cds tl = self.tl tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh hn = self.hn bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb, scope='gene_token_emb_mat') emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq) # bs,sl,tel self.tensor_dict['emb'] = emb with tf.variable_scope('ct_attn'): rep_fw = directional_attention_with_dense( emb, self.token_mask, 'forward', 'dir_attn_fw', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='fw_attn') rep_bw = directional_attention_with_dense( emb, self.token_mask, 'backward', 'dir_attn_bw', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='bw_attn') seq_rep = tf.concat([rep_fw, rep_bw], -1) with tf.variable_scope('sent_enc_attn'): rep = multi_dimensional_attention(seq_rep, self.token_mask, 'multi_dimensional_attention', cfg.dropout, self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='attn') with tf.variable_scope('output'): pre_logits = tf.nn.relu( linear([rep], hn, True, scope='pre_logits_linear', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)) # bs, hn logits = linear([pre_logits], self.output_class, False, scope='get_output', wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5 _logger.done() return logits
def build_network(self): tds, tel, hn = self.tds, self.tel, self.hn bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql with tf.variable_scope('emb'): token_emb_mat = generate_embedding_mat( tds, tel, init_mat=self.token_emb_mat, extra_mat=self.glove_emb_mat, scope='gene_token_emb_mat') c_emb = tf.nn.embedding_lookup(token_emb_mat, self.context_token) # bs,sn,sl,tel q_emb = tf.nn.embedding_lookup(token_emb_mat, self.question_token) # s,ql,tel with tf.variable_scope('prepro'): q_rep = multi_dimensional_attention(q_emb, self.question_token_mask, 'q2coding', cfg.dropout, self.is_train, cfg.wd, 'relu') # bs, hn q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) # bs, hn with tf.variable_scope('sent_emb'): c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel], 'c_emb_rshp') # bs*sn,sl,tel c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl], 'c_mask_rshp') # bs*sn,sl,tel sent_enc_rshp = sentence_encoding_models( c_emb_rshp, c_mask_rshp, cfg.context_fusion_method, 'relu', 'sent2enc', cfg.wd, self.is_train, cfg.dropout, hn, block_len=cfg.block_len) # bs*sn, 2*hn sent_enc = tf.reshape(sent_enc_rshp, [bs, sn, 2 * hn]) # bs,sn, 2*hn sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0., 'sent_enc_map', 'relu', False, cfg.wd, cfg.dropout, self.is_train) with tf.variable_scope('fusion'): q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1), [1, sn, 1]) # bs, sn, hn fusion_rep = tf.concat([ sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex, sent_enc_map * q_rep_map_ex ], -1) # bs,sn,4hn with tf.variable_scope('output'): out_cf = context_fusion_layers(fusion_rep, self.context_sent_mask, cfg.context_fusion_method, 'relu', 'out_cf', cfg.wd, self.is_train, cfg.dropout, hn, block_len=4) pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output', 'relu', False, cfg.wd, cfg.dropout, self.is_train) logits = get_logits( # exp masked pre_output, None, True, 0., 'logits', self.context_sent_mask, cfg.wd, cfg.dropout, self.is_train, 'linear') return logits