Ejemplo n.º 1
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' % cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs = self.bs

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat,
                                                   extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb,
                                                   scope='gene_token_emb_mat')
            emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq)  # bs,sl1,tel

        with tf.variable_scope('sent_encoding'):
            rep = sentence_encoding_models(
                emb, self.token_mask, cfg.context_fusion_method, 'relu',
                'ct_based_sent2vec', cfg.wd, self.is_train, cfg.dropout,
                block_len=cfg.block_len)

        with tf.variable_scope('output'):
            pre_logits = tf.nn.relu(linear([rep], hn, True, scope='pre_logits_linear',
                                           wd=cfg.wd, input_keep_prob=cfg.dropout,
                                           is_train=self.is_train))  # bs, hn
            logits = linear([pre_logits], self.output_class, False, scope='get_output',
                            wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5
        _logger.done()
        return logits
Ejemplo n.º 2
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' % cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat,
                                                   extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb,
                                                   scope='gene_token_emb_mat')
            emb = tf.nn.embedding_lookup(token_emb_mat, self.token_seq)  # bs,sl,tel
            self.tensor_dict['emb'] = emb

        rep = disan(
            emb, self.token_mask, 'DiSAN', cfg.dropout,
            self.is_train, cfg.wd, 'relu', tensor_dict=self.tensor_dict, name='')

        with tf.variable_scope('output'):
            pre_logits = tf.nn.relu(linear([rep], hn, True, scope='pre_logits_linear',
                                          wd=cfg.wd, input_keep_prob=cfg.dropout,
                                          is_train=self.is_train))  # bs, hn
            logits = linear([pre_logits], self.output_class, False, scope='get_output',
                            wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train) # bs, 5
        _logger.done()
        return logits
Ejemplo n.º 3
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' % cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat,
                                                   extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb,
                                                   scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('sent_enc_attn'):
            s1_rep = traditional_attention(
                s1_emb, self.sent1_token_mask, 'traditional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = traditional_attention(
                s2_emb, self.sent2_token_mask, 'traditional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_attn')

            self.tensor_dict['s1_rep'] = s1_rep
            self.tensor_dict['s2_rep'] = s2_rep

        with tf.variable_scope('output'):
            out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1)
            pre_output = tf.nn.elu(linear([out_rep], hn, True, 0., scope= 'pre_output', squeeze=False,
                                           wd=cfg.wd, input_keep_prob=cfg.dropout,is_train=self.is_train))
            logits = linear([pre_output], self.output_class, True, 0., scope= 'logits', squeeze=False,
                            wd=cfg.wd, input_keep_prob=cfg.dropout,is_train=self.is_train)
            self.tensor_dict[logits] = logits
        return logits # logits
Ejemplo n.º 4
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' % cfg.network_type)

        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(tds, tel, init_mat=self.token_emb_mat,
                                                   extra_mat=self.glove_emb_mat, extra_trainable=self.finetune_emb,
                                                   scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat, self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('hard_network'):
            # s1_act, s1_logpa, s2_act, s2_logpa, choose_percentage
            s1_act = self.sent1_token_mask
            s1_logpa = tf.cast(s1_act, tf.float32)

            s2_act = self.sent2_token_mask
            s2_logpa = tf.cast(s2_act, tf.float32)

            s1_percentage = tf.ones([bs], tf.float32)
            s2_percentage = tf.ones([bs], tf.float32)

        with tf.variable_scope('ct_attn'):
            s1_fw = directional_attention_with_dense(
                s1_emb, self.sent1_token_mask, 'forward', 'dir_attn_fw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_fw_attn')
            s1_bw = directional_attention_with_dense(
                s1_emb, self.sent1_token_mask, 'backward', 'dir_attn_bw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_bw_attn')

            s1_seq_rep = tf.concat([s1_fw, s1_bw], -1)

            tf.get_variable_scope().reuse_variables()

            s2_fw = directional_attention_with_dense(
                s2_emb, self.sent2_token_mask, 'forward', 'dir_attn_fw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_fw_attn')
            s2_bw = directional_attention_with_dense(
                s2_emb, self.sent2_token_mask, 'backward', 'dir_attn_bw',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_bw_attn')
            s2_seq_rep = tf.concat([s2_fw, s2_bw], -1)

        with tf.variable_scope('sentence_enc'):
            s1_rep = multi_dimensional_attention(
                s1_seq_rep, self.sent1_token_mask, 'multi_dimensional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = multi_dimensional_attention(
                s2_seq_rep, self.sent2_token_mask, 'multi_dimensional_attention',
                cfg.dropout, self.is_train, cfg.wd,
                tensor_dict=self.tensor_dict, name='s2_attn')

        with tf.variable_scope('output'):
            out_rep = tf.concat([s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1)
            out_rep_map = bn_dense_layer(
                out_rep, hn, True, 0., 'out_rep_map', 'elu', False, cfg.wd, cfg.dropout, self.is_train)
            pre_output1 = highway_network(
                out_rep_map, hn, True, 0., 'pre_output1', 'elu', False, cfg.wd, cfg.dropout, self.is_train)
            logits = linear([pre_output1], self.output_class, True, 0., scope='logits', squeeze=False,
                            wd=cfg.wd, input_keep_prob=cfg.dropout, is_train=self.is_train)
        return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage, s2_percentage)  # logits
Ejemplo n.º 5
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('sent_encoding'):
            act_func_str = 'elu' if cfg.context_fusion_method in [
                'block', 'disa'
            ] else 'relu'

            s1_rep = sentence_encoding_models(s1_emb,
                                              self.sent1_token_mask,
                                              cfg.context_fusion_method,
                                              act_func_str,
                                              'ct_based_sent2vec',
                                              cfg.wd,
                                              self.is_train,
                                              cfg.dropout,
                                              block_len=cfg.block_len)

            tf.get_variable_scope().reuse_variables()

            s2_rep = sentence_encoding_models(s2_emb,
                                              self.sent2_token_mask,
                                              cfg.context_fusion_method,
                                              act_func_str,
                                              'ct_based_sent2vec',
                                              cfg.wd,
                                              self.is_train,
                                              cfg.dropout,
                                              block_len=cfg.block_len)

            self.tensor_dict['s1_rep'] = s1_rep
            self.tensor_dict['s2_rep'] = s2_rep

        with tf.variable_scope('output'):
            act_func = tf.nn.elu if cfg.context_fusion_method in [
                'block', 'disa'
            ] else tf.nn.relu

            out_rep = tf.concat(
                [s1_rep, s2_rep, s1_rep - s2_rep, s1_rep * s2_rep], -1)
            pre_output = act_func(
                linear([out_rep],
                       hn,
                       True,
                       0.,
                       scope='pre_output',
                       squeeze=False,
                       wd=cfg.wd,
                       input_keep_prob=cfg.dropout,
                       is_train=self.is_train))
            logits = linear([pre_output],
                            self.output_class,
                            True,
                            0.,
                            scope='logits',
                            squeeze=False,
                            wd=cfg.wd,
                            input_keep_prob=cfg.dropout,
                            is_train=self.is_train)
            self.tensor_dict[logits] = logits
        return logits  # logits
Ejemplo n.º 6
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)

        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl1, sl2 = self.bs, self.sl1, self.sl2

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            s1_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent1_token)  # bs,sl1,tel
            s2_emb = tf.nn.embedding_lookup(token_emb_mat,
                                            self.sent2_token)  # bs,sl2,tel
            self.tensor_dict['s1_emb'] = s1_emb
            self.tensor_dict['s2_emb'] = s2_emb

        with tf.variable_scope('hard_network'):
            # for sentence 1
            s1_emb_new = sequence_conditional_feature(s1_emb,
                                                      self.sent1_token_mask)
            s1_logpa_dep, s1_act_dep, s1_percentage_dep = generate_mask_with_rl(
                s1_emb_new, self.sent1_token_mask, False,
                'generate_mask_with_rl_dep', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s1_logpa_head, s1_act_head, s1_percentage_head = generate_mask_with_rl(
                s1_emb_new, self.sent1_token_mask, False,
                'generate_mask_with_rl_head', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s1_logpa = tf.concat([s1_logpa_dep, s1_logpa_head], -1)
            s1_act = tf.logical_and(tf.expand_dims(s1_act_dep, 1),
                                    tf.expand_dims(s1_act_head, 2))
            s1_percentage = s1_percentage_dep * s1_percentage_head

            tf.get_variable_scope().reuse_variables()
            # for sentence 2
            s2_emb_new = sequence_conditional_feature(s2_emb,
                                                      self.sent2_token_mask)
            s2_logpa_dep, s2_act_dep, s2_percentage_dep = generate_mask_with_rl(
                s2_emb_new, self.sent2_token_mask, False,
                'generate_mask_with_rl_dep', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s2_logpa_head, s2_act_head, s2_percentage_head = generate_mask_with_rl(
                s2_emb_new, self.sent2_token_mask, False,
                'generate_mask_with_rl_head', cfg.dropout, self.is_train,
                cfg.wd, 'relu', self.disable_rl, self.global_step, cfg.mode,
                cfg.start_only_rl, hn)  # [bs, sl] & [bs, sl]
            s2_logpa = tf.concat([s2_logpa_dep, s2_logpa_head], -1)
            s2_act = tf.logical_and(tf.expand_dims(s2_act_dep, 1),
                                    tf.expand_dims(s2_act_head, 2))
            s2_percentage = s2_percentage_dep * s2_percentage_head

        keep_unselected = True
        with tf.variable_scope('ct_attn'):
            s1_fw, s1_token_mask_new = directional_attention_with_selections(
                s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head,
                'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s1_bw, _ = directional_attention_with_selections(
                s1_emb, self.sent1_token_mask, s1_act_dep, s1_act_head,
                'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')

            s1_seq_rep = tf.concat([s1_fw, s1_bw], -1)

            tf.get_variable_scope().reuse_variables()

            s2_fw, s2_token_mask_new = directional_attention_with_selections(
                s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head,
                'forward', hn, keep_unselected, 'dir_attn_fw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s2_bw, _ = directional_attention_with_selections(
                s2_emb, self.sent2_token_mask, s2_act_dep, s2_act_head,
                'backward', hn, keep_unselected, 'dir_attn_bw', cfg.dropout,
                self.is_train, cfg.wd, 'relu')
            s2_seq_rep = tf.concat([s2_fw, s2_bw], -1)

        with tf.variable_scope('sentence_enc'):
            s1_rep = multi_dimensional_attention(s1_seq_rep,
                                                 s1_token_mask_new,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 'relu',
                                                 tensor_dict=self.tensor_dict,
                                                 name='s1_attn')
            tf.get_variable_scope().reuse_variables()
            s2_rep = multi_dimensional_attention(s2_seq_rep,
                                                 s2_token_mask_new,
                                                 'multi_dimensional_attention',
                                                 cfg.dropout,
                                                 self.is_train,
                                                 cfg.wd,
                                                 'relu',
                                                 tensor_dict=self.tensor_dict,
                                                 name='s2_attn')

        with tf.variable_scope('output'):
            out_rep = tf.concat([s1_rep * s2_rep, tf.abs(s1_rep - s2_rep)], -1)
            out_rep_map = bn_dense_layer(out_rep, hn, True, 0., 'out_rep_map',
                                         'relu', False, cfg.wd, cfg.dropout,
                                         self.is_train)
            if cfg.use_mse and cfg.mse_logits:
                logits = tf.nn.sigmoid(
                    linear(out_rep_map,
                           1,
                           True,
                           0.,
                           scope='logits',
                           squeeze=True,
                           wd=cfg.wd,
                           input_keep_prob=cfg.dropout,
                           is_train=self.is_train)) * 2. + 3.
            else:
                logits = linear([out_rep_map],
                                self.output_class,
                                True,
                                0.,
                                scope='logits',
                                squeeze=False,
                                wd=cfg.wd,
                                input_keep_prob=cfg.dropout,
                                is_train=self.is_train)
        return logits, (s1_act, s1_logpa), (s2_act, s2_logpa), (s1_percentage,
                                                                s2_percentage
                                                                )  # logits
Ejemplo n.º 7
0
    def build_network(self):
        _logger.add()
        _logger.add('building %s neural network structure...' %
                    cfg.network_type)
        tds, cds = self.tds, self.cds
        tl = self.tl
        tel, cel, cos, ocd, fh = self.tel, self.cel, self.cos, self.ocd, self.fh
        hn = self.hn
        bs, sl, ol, mc = self.bs, self.sl, self.ol, self.mc

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                extra_trainable=self.finetune_emb,
                scope='gene_token_emb_mat')
            emb = tf.nn.embedding_lookup(token_emb_mat,
                                         self.token_seq)  # bs,sl,tel
            self.tensor_dict['emb'] = emb

        with tf.variable_scope('ct_attn'):
            rep_fw = directional_attention_with_dense(
                emb,
                self.token_mask,
                'forward',
                'dir_attn_fw',
                cfg.dropout,
                self.is_train,
                cfg.wd,
                'relu',
                tensor_dict=self.tensor_dict,
                name='fw_attn')
            rep_bw = directional_attention_with_dense(
                emb,
                self.token_mask,
                'backward',
                'dir_attn_bw',
                cfg.dropout,
                self.is_train,
                cfg.wd,
                'relu',
                tensor_dict=self.tensor_dict,
                name='bw_attn')

            seq_rep = tf.concat([rep_fw, rep_bw], -1)

        with tf.variable_scope('sent_enc_attn'):
            rep = multi_dimensional_attention(seq_rep,
                                              self.token_mask,
                                              'multi_dimensional_attention',
                                              cfg.dropout,
                                              self.is_train,
                                              cfg.wd,
                                              'relu',
                                              tensor_dict=self.tensor_dict,
                                              name='attn')

        with tf.variable_scope('output'):
            pre_logits = tf.nn.relu(
                linear([rep],
                       hn,
                       True,
                       scope='pre_logits_linear',
                       wd=cfg.wd,
                       input_keep_prob=cfg.dropout,
                       is_train=self.is_train))  # bs, hn
            logits = linear([pre_logits],
                            self.output_class,
                            False,
                            scope='get_output',
                            wd=cfg.wd,
                            input_keep_prob=cfg.dropout,
                            is_train=self.is_train)  # bs, 5
        _logger.done()
        return logits
Ejemplo n.º 8
0
    def build_network(self):
        tds, tel, hn = self.tds, self.tel, self.hn
        bs, sn, sl, ql = self.bs, self.sn, self.sl, self.ql

        with tf.variable_scope('emb'):
            token_emb_mat = generate_embedding_mat(
                tds,
                tel,
                init_mat=self.token_emb_mat,
                extra_mat=self.glove_emb_mat,
                scope='gene_token_emb_mat')
            c_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.context_token)  # bs,sn,sl,tel
            q_emb = tf.nn.embedding_lookup(token_emb_mat,
                                           self.question_token)  # s,ql,tel

        with tf.variable_scope('prepro'):
            q_rep = multi_dimensional_attention(q_emb,
                                                self.question_token_mask,
                                                'q2coding', cfg.dropout,
                                                self.is_train, cfg.wd,
                                                'relu')  # bs, hn
            q_rep_map = bn_dense_layer(q_rep, hn, True, 0., 'q_rep_map',
                                       'relu', False, cfg.wd, cfg.dropout,
                                       self.is_train)  # bs, hn

        with tf.variable_scope('sent_emb'):
            c_emb_rshp = tf.reshape(c_emb, [bs * sn, sl, tel],
                                    'c_emb_rshp')  # bs*sn,sl,tel
            c_mask_rshp = tf.reshape(self.context_token_mask, [bs * sn, sl],
                                     'c_mask_rshp')  # bs*sn,sl,tel
            sent_enc_rshp = sentence_encoding_models(
                c_emb_rshp,
                c_mask_rshp,
                cfg.context_fusion_method,
                'relu',
                'sent2enc',
                cfg.wd,
                self.is_train,
                cfg.dropout,
                hn,
                block_len=cfg.block_len)  # bs*sn, 2*hn
            sent_enc = tf.reshape(sent_enc_rshp,
                                  [bs, sn, 2 * hn])  # bs,sn, 2*hn
            sent_enc_map = bn_dense_layer(sent_enc, hn, True, 0.,
                                          'sent_enc_map', 'relu', False,
                                          cfg.wd, cfg.dropout, self.is_train)

        with tf.variable_scope('fusion'):
            q_rep_map_ex = tf.tile(tf.expand_dims(q_rep_map, 1),
                                   [1, sn, 1])  # bs, sn, hn
            fusion_rep = tf.concat([
                sent_enc_map, q_rep_map_ex, sent_enc_map - q_rep_map_ex,
                sent_enc_map * q_rep_map_ex
            ], -1)  # bs,sn,4hn

        with tf.variable_scope('output'):
            out_cf = context_fusion_layers(fusion_rep,
                                           self.context_sent_mask,
                                           cfg.context_fusion_method,
                                           'relu',
                                           'out_cf',
                                           cfg.wd,
                                           self.is_train,
                                           cfg.dropout,
                                           hn,
                                           block_len=4)
            pre_output = bn_dense_layer(out_cf, hn, True, 0., 'pre_output',
                                        'relu', False, cfg.wd, cfg.dropout,
                                        self.is_train)

        logits = get_logits(  # exp masked
            pre_output, None, True, 0., 'logits', self.context_sent_mask,
            cfg.wd, cfg.dropout, self.is_train, 'linear')
        return logits