예제 #1
0
파일: interface.py 프로젝트: byd789/MPSAN
def sentence_encoding_models(rep_tensor,
                             rep_mask,
                             method,
                             activation_function,
                             scope=None,
                             wd=0.,
                             is_train=None,
                             keep_prob=1.,
                             **kwargs):
    method_name_list = [
        'cnn_kim',
        'no_ct',
        'lstm',
        'gru',
        'sru',
        'sru_normal',  # rnn
        'cnn',
        'multi_head',
        'multi_head_git',
        'disa',
        'mlsa',
        'block'
    ]
    with tf.variable_scope(scope or 'sentence_encoding_models'):
        if method == 'cnn_kim':
            sent_coding = cnn_for_sentence_encoding(rep_tensor, rep_mask,
                                                    (3, 4, 5), 200,
                                                    'sent_encoding_cnn_kim',
                                                    is_train, keep_prob, wd)
        elif method == 'none':
            sent_coding = tf.reduce_sum(
                mask_for_high_rank(rep_tensor, rep_mask), 1)
        else:
            ct_rep = None
            if method == 'no_ct':
                ct_rep = tf.identity(rep_tensor)
            else:
                ct_rep = context_fusion_layers(rep_tensor, rep_mask, method,
                                               activation_function, None, wd,
                                               is_train, keep_prob, **kwargs)

            sent_coding = multi_dimensional_attention(
                ct_rep, rep_mask, 'multi_dim_attn_for_%s' % method, keep_prob,
                is_train, wd, activation_function)

        return sent_coding
예제 #2
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('sent_enc_attn'):
            s1_rep = multi_dimensional_attention(s1_emb,
                                                 self.sent1_token_mask,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 tensor_dict=self.tensor_dict,
                                                 name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = multi_dimensional_attention(s2_emb,
                                                 self.sent2_token_mask,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 tensor_dict=self.tensor_dict,
                                                 name='s2_attn')

            self.tensor_dict['s1_rep'] = s1_rep
            self.tensor_dict['s2_rep'] = s2_rep

        with tf.variable_scope('output'):
            out_rep = tf.concat(
                [s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1)
            pre_output = tf.nn.elu(
                linear([out_rep],
                       hn,
                       True,
                       0.,
                       scope='pre_output',
                       squeeze=False,
                       wd=cfg.wd,
                       input_keep_prob=cfg.dropout,
                       is_train=self.is_train))
            logits = linear([pre_output],
                            self.output_class,
                            True,
                            0.,
                            scope='logits',
                            squeeze=False,
                            wd=cfg.wd,
                            input_keep_prob=cfg.dropout,
                            is_train=self.is_train)
            self.tensor_dict[logits] = logits
        return logits  # logits
예제 #3
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' % cfg.network_type)

        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat,
                                                   extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb,
                                                   scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('hard_network'):
            # s1_act, s1_logpa, s2_act, s2_logpa, choose_percentage
            s1_act = self.sent1_token_mask
            s1_logpa = tf.cast(s1_act, tf.float32)

            s2_act = self.sent2_token_mask
            s2_logpa = tf.cast(s2_act, tf.float32)

            s1_percentage = tf.ones([bs], tf.float32)
            s2_percentage = tf.ones([bs], tf.float32)

        with tf.variable_scope('ct_attn'):
            s1_fw = directional_attention_with_dense(
                s1_emb, self.sent1_token_mask, 'forward', 'dir_attn_fw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_fw_attn')
            s1_bw = directional_attention_with_dense(
                s1_emb, self.sent1_token_mask, 'backward', 'dir_attn_bw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_bw_attn')

            s1_seq_rep = tf.concat([s1_fw, s1_bw], -1)

            tf.get_variable_scope().reuse_variables()

            s2_fw = directional_attention_with_dense(
                s2_emb, self.sent2_token_mask, 'forward', 'dir_attn_fw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_fw_attn')
            s2_bw = directional_attention_with_dense(
                s2_emb, self.sent2_token_mask, 'backward', 'dir_attn_bw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_bw_attn')
            s2_seq_rep = tf.concat([s2_fw, s2_bw], -1)

        with tf.variable_scope('sentence_enc'):
            s1_rep = multi_dimensional_attention(
                s1_seq_rep, self.sent1_token_mask, 'multi_dimensional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = multi_dimensional_attention(
                s2_seq_rep, self.sent2_token_mask, 'multi_dimensional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_attn')

        with tf.variable_scope('output'):
            out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1)
            out_rep_map = bn_dense_layer(
                out_rep, hn, True, 0., 'out_rep_map', 'elu', False, cfg.wd, cfg.dropout, self.is_train)
            pre_output1 = highway_network(
                out_rep_map, hn, True, 0., 'pre_output1', 'elu', False, cfg.wd, cfg.dropout, self.is_train)
            logits = linear([pre_output1], self.output_class, True, 0., scope='logits', squeeze=False,
                            wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)
        return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage)  # logits
예제 #4
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)

        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('hard_network'):
            # for sentence 1
            s1_emb_new = sequence_conditional_feature(s1_emb,
                                                      self.sent1_token_mask)
            s1_logpa_dep, s1_act_dep, s1_percentage_dep = generate_mask_with_rl(
                s1_emb_new, self.sent1_token_mask, False,
                'generate_mask_with_rl_dep', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s1_logpa_head, s1_act_head, s1_percentage_head = generate_mask_with_rl(
                s1_emb_new, self.sent1_token_mask, False,
                'generate_mask_with_rl_head', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s1_logpa = tf.concat([s1_logpa_dep, s1_logpa_head], -1)
            s1_act = tf.logical_and(tf.expand_dims(s1_act_dep, 1),
                                    tf.expand_dims(s1_act_head, 2))
            s1_percentage = s1_percentage_dep * s1_percentage_head

            tf.get_variable_scope().reuse_variables()
            # for sentence 2
            s2_emb_new = sequence_conditional_feature(s2_emb,
                                                      self.sent2_token_mask)
            s2_logpa_dep, s2_act_dep, s2_percentage_dep = generate_mask_with_rl(
                s2_emb_new, self.sent2_token_mask, False,
                'generate_mask_with_rl_dep', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s2_logpa_head, s2_act_head, s2_percentage_head = generate_mask_with_rl(
                s2_emb_new, self.sent2_token_mask, False,
                'generate_mask_with_rl_head', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s2_logpa = tf.concat([s2_logpa_dep, s2_logpa_head], -1)
            s2_act = tf.logical_and(tf.expand_dims(s2_act_dep, 1),
                                    tf.expand_dims(s2_act_head, 2))
            s2_percentage = s2_percentage_dep * s2_percentage_head

        keep_unselected = True
        with tf.variable_scope('ct_attn'):
            s1_fw, s1_token_mask_new = directional_attention_with_selections(
                s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head,
                'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s1_bw, _ = directional_attention_with_selections(
                s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head,
                'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')

            s1_seq_rep = tf.concat([s1_fw, s1_bw], -1)

            tf.get_variable_scope().reuse_variables()

            s2_fw, s2_token_mask_new = directional_attention_with_selections(
                s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head,
                'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s2_bw, _ = directional_attention_with_selections(
                s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head,
                'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s2_seq_rep = tf.concat([s2_fw, s2_bw], -1)

        with tf.variable_scope('sentence_enc'):
            s1_rep = multi_dimensional_attention(s1_seq_rep,
                                                 s1_token_mask_new,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 'relu',
                                                 tensor_dict=self.tensor_dict,
                                                 name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = multi_dimensional_attention(s2_seq_rep,
                                                 s2_token_mask_new,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 'relu',
                                                 tensor_dict=self.tensor_dict,
                                                 name='s2_attn')

        with tf.variable_scope('output'):
            out_rep = tf.concat([s1_rep * s2_rep, tf.abs(s1_rep - s2_rep)], -1)
            out_rep_map = bn_dense_layer(out_rep, hn, True, 0., 'out_rep_map',
                                         'relu', False, cfg.wd, cfg.dropout,
                                         self.is_train)
            if cfg.use_mse and cfg.mse_logits:
                logits = tf.nn.sigmoid(
                    linear(out_rep_map,
                           1,
                           True,
                           0.,
                           scope='logits',
                           squeeze=True,
                           wd=cfg.wd,
                           input_keep_prob=cfg.dropout,
                           is_train=self.is_train)) * 2. + 3.
            else:
                logits = linear([out_rep_map],
                                self.output_class,
                                True,
                                0.,
                                scope='logits',
                                squeeze=False,
                                wd=cfg.wd,
                                input_keep_prob=cfg.dropout,
                                is_train=self.is_train)
        return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage,
                                                                s2_percentage
                                                                )  # logits
예제 #5
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            emb = tf.nn.embedding_lookup(token_emb_mat,
                                         self.token_seq)  # bs,sl,tel
            self.tensor_dict['emb'] = emb

        with tf.variable_scope('ct_attn'):
            rep_fw = directional_attention_with_dense(
                emb,
                self.token_mask,
                'forward',
                'dir_attn_fw',
                cfg.dropout,
                self.is_train,
                cfg.wd,
                'relu',
                tensor_dict=self.tensor_dict,
                name='fw_attn')
            rep_bw = directional_attention_with_dense(
                emb,
                self.token_mask,
                'backward',
                'dir_attn_bw',
                cfg.dropout,
                self.is_train,
                cfg.wd,
                'relu',
                tensor_dict=self.tensor_dict,
                name='bw_attn')

            seq_rep = tf.concat([rep_fw, rep_bw], -1)

        with tf.variable_scope('sent_enc_attn'):
            rep = multi_dimensional_attention(seq_rep,
                                              self.token_mask,
                                              'multi_dimensional_attention',
                                              cfg.dropout,
                                              self.is_train,
                                              cfg.wd,
                                              'relu',
                                              tensor_dict=self.tensor_dict,
                                              name='attn')

        with tf.variable_scope('output'):
            pre_logits = tf.nn.relu(
                linear([rep],
                       hn,
                       True,
                       scope='pre_logits_linear',
                       wd=cfg.wd,
                       input_keep_prob=cfg.dropout,
                       is_train=self.is_train))  # bs, hn
            logits = linear([pre_logits],
                            self.output_class,
                            False,
                            scope='get_output',
                            wd=cfg.wd,
                            input_keep_prob=cfg.dropout,
                            is_train=self.is_train)  # bs, 5
        _logger.done()
        return logits
예제 #6
0
    def build_network(self):
        tds, tel, hn = self.tds, self.tel, self.hn
        bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                scope='gene_token_emb_mat')
            c_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.context_token)  # bs,sn,sl,tel
            q_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.question_token)  # s,ql,tel

        with tf.variable_scope('prepro'):
            q_rep = multi_dimensional_attention(q_emb,
                                                self.question_token_mask,
                                                'q2coding', cfg.dropout,
                                                self.is_train, cfg.wd,
                                                'relu')  # bs, hn
            q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map',
                                       'relu', False, cfg.wd, cfg.dropout,
                                       self.is_train)  # bs, hn

        with tf.variable_scope('sent_emb'):
            c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel],
                                    'c_emb_rshp')  # bs*sn,sl,tel
            c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl],
                                     'c_mask_rshp')  # bs*sn,sl,tel
            sent_enc_rshp = sentence_encoding_models(
                c_emb_rshp,
                c_mask_rshp,
                cfg.context_fusion_method,
                'relu',
                'sent2enc',
                cfg.wd,
                self.is_train,
                cfg.dropout,
                hn,
                block_len=cfg.block_len)  # bs*sn, 2*hn
            sent_enc = tf.reshape(sent_enc_rshp,
                                  [bs, sn, 2 * hn])  # bs,sn, 2*hn
            sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0.,
                                          'sent_enc_map', 'relu', False,
                                          cfg.wd, cfg.dropout, self.is_train)

        with tf.variable_scope('fusion'):
            q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1),
                                   [1, sn, 1])  # bs, sn, hn
            fusion_rep = tf.concat([
                sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex,
                sent_enc_map * q_rep_map_ex
            ], -1)  # bs,sn,4hn

        with tf.variable_scope('output'):
            out_cf = context_fusion_layers(fusion_rep,
                                           self.context_sent_mask,
                                           cfg.context_fusion_method,
                                           'relu',
                                           'out_cf',
                                           cfg.wd,
                                           self.is_train,
                                           cfg.dropout,
                                           hn,
                                           block_len=4)
            pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output',
                                        'relu', False, cfg.wd, cfg.dropout,
                                        self.is_train)

        logits = get_logits(  # exp masked
            pre_output, None, True, 0., 'logits', self.context_sent_mask,
            cfg.wd, cfg.dropout, self.is_train, 'linear')
        return logits