コード例 #1
0
 def get_batch_generator(self):
     if self.is_train:
         while True:
             X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch()
             if self.config['use_dpool']:
                 yield ({
                     'query':
                     X1,
                     'query_len':
                     X1_len,
                     'doc':
                     X2,
                     'doc_len':
                     X2_len,
                     'dpool_index':
                     DynamicMaxPooling.dynamic_pooling_index(
                         X1_len, X2_len, self.config['text1_maxlen'],
                         self.config['text2_maxlen'])
                 }, Y)
             else:
                 yield ({
                     'query': X1,
                     'query_len': X1_len,
                     'doc': X2,
                     'doc_len': X2_len
                 }, Y)
     else:
         while self.point + self.batch_size <= self.total_rel_num:
             X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch(
                 randomly=False)
             if self.config['use_dpool']:
                 yield ({
                     'query':
                     X1,
                     'query_len':
                     X1_len,
                     'doc':
                     X2,
                     'doc_len':
                     X2_len,
                     'dpool_index':
                     DynamicMaxPooling.dynamic_pooling_index(
                         X1_len, X2_len, self.config['text1_maxlen'],
                         self.config['text2_maxlen']),
                     'ID':
                     ID_pairs
                 }, Y)
             else:
                 yield ({
                     'query': X1,
                     'query_len': X1_len,
                     'doc': X2,
                     'doc_len': X2_len,
                     'ID': ID_pairs
                 }, Y)
コード例 #2
0
 def get_batch_generator(self):
     for X1, X1_len, X2, X2_len, Y, ID_pairs, list_counts in self.get_batch(
     ):
         if self.config['use_dpool']:
             yield ({
                 'query':
                 X1,
                 'query_len':
                 X1_len,
                 'doc':
                 X2,
                 'doc_len':
                 X2_len,
                 'dpool_index':
                 DynamicMaxPooling.dynamic_pooling_index(
                     X1_len, X2_len, self.config['text1_maxlen'],
                     self.config['text2_maxlen']),
                 'ID':
                 ID_pairs,
                 'list_counts':
                 list_counts
             }, Y)
         else:
             yield ({
                 'query': X1,
                 'query_len': X1_len,
                 'doc': X2,
                 'doc_len': X2_len,
                 'ID': ID_pairs,
                 'list_counts': list_counts
             }, Y)
コード例 #3
0
ファイル: pair_generator.py プロジェクト: zhichao-li/MatchZoo
 def get_batch_generator(self):
     while True:
         X1, X1_len, X2, X2_len, Y = self.get_batch()
         if self.config['use_dpool']:
             yield ({
                 'query':
                 X1,
                 'query_len':
                 X1_len,
                 'doc':
                 X2,
                 'doc_len':
                 X2_len,
                 'dpool_index':
                 DynamicMaxPooling.dynamic_pooling_index(
                     X1_len, X2_len, self.config['text1_maxlen'],
                     self.config['text2_maxlen'])
             }, Y)
         else:
             yield ({
                 'query': X1,
                 'query_len': X1_len,
                 'doc': X2,
                 'doc_len': X2_len
             }, Y)
コード例 #4
0
ファイル: point_generator.py プロジェクト: Joseph94m/MatchZoo
 def get_batch_generator(self):
     while True:
         sample = self.get_batch()
         if not sample:
             break
         X1, X1_len, X2, X2_len, Y, ID_pairs = sample
         if self.config['use_dpool']:
             yield ({
                 'query':
                 X1,
                 'query_len':
                 X1_len,
                 'doc':
                 X2,
                 'doc_len':
                 X2_len,
                 'dpool_index':
                 DynamicMaxPooling.dynamic_pooling_index(
                     X1_len, X2_len, self.config['text1_maxlen'],
                     self.config['text2_maxlen']),
                 'ID':
                 ID_pairs
             }, Y)
         else:
             yield ({
                 'query': X1,
                 'query_len': X1_len,
                 'doc': X2,
                 'doc_len': X2_len,
                 'ID': ID_pairs
             }, Y)
コード例 #5
0
ファイル: str_generator.py プロジェクト: JuniorPan/MatchZoo
 def get_dpool_index(self, _len1, _len2):
     '''
     get dynamic pooling index
     @param _len1: int length of text1 terms
     @param _len2: int length of text2 terms
     @return: np.array(index)
     '''
     _dpool_index_arr = DynamicMaxPooling.dynamic_pooling_index([_len1,], [_len2,], self.config['text1_maxlen'], self.config['text2_maxlen'])
     return _dpool_index_arr
コード例 #6
0
 def get_batch_generator(self):
     while True:
         X1, XP1, X1_len, XP1_len, X2, XP2, X2_len, XP2_len, Y = self.get_batch(
         )
         # print('shapes: X1:{}, XP1:{}, X2:{}, XPS:{}, Y:{}'.format(X1.shape, XP1.shape, X2.shape, XP2.shape, Y.shape))
         if self.config['use_dpool']:
             yield ({
                 'query':
                 X1,
                 'query_pos':
                 XP1,
                 'query_len':
                 X1_len,
                 'query_pos_len':
                 XP1_len,
                 'doc':
                 X2,
                 'doc_pos':
                 XP2,
                 'doc_len':
                 X2_len,
                 'doc_pos_len':
                 XP2_len,
                 'dpool_index':
                 DynamicMaxPooling.dynamic_pooling_index(
                     X1_len, X2_len, self.config['text1_maxlen'],
                     self.config['text2_maxlen']),
                 'dpool_pos_index':
                 DynamicMaxPooling.dynamic_pooling_index(
                     XP1_len, XP2_len, self.config['pos1_maxlen'],
                     self.config['pos2_maxlen'])
             }, Y)
         else:
             yield ({
                 'query': X1,
                 'query_pos': XP1,
                 'query_len': X1_len,
                 'query_pos_len': XP1_len,
                 'doc': X2,
                 'doc_pos': XP2,
                 'doc_len': X2_len,
                 'doc_pos_len': XP2_len
             }, Y)
コード例 #7
0
ファイル: matchpyramid.py プロジェクト: qiuchili/qnn_text
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)
        dpool_index = Input(name='dpool_index',
                            shape=[
                                self.config['text1_maxlen'],
                                self.config['text2_maxlen'], 3
                            ],
                            dtype='int32')
        show_layer_info('Input', dpool_index)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed])
        show_layer_info('Dot', cross)
        cross_reshape = Reshape((self.config['text1_maxlen'],
                                 self.config['text2_maxlen'], 1))(cross)
        show_layer_info('Reshape', cross_reshape)

        conv2d = Conv2D(self.config['kernel_count'],
                        self.config['kernel_size'],
                        padding='same',
                        activation='relu')
        dpool = DynamicMaxPooling(self.config['dpool_size'][0],
                                  self.config['dpool_size'][1])

        conv1 = conv2d(cross_reshape)
        show_layer_info('Conv2D', conv1)
        pool1 = dpool([conv1, dpool_index])
        show_layer_info('DynamicMaxPooling', pool1)
        pool1_flat = Flatten()(pool1)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)
        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        return model
コード例 #8
0
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)
        dpool_index = Input(name='dpool_index',
                            shape=[
                                self.config['text1_maxlen'],
                                self.config['text2_maxlen'], 3
                            ],
                            dtype='int32')
        show_layer_info('Input', dpool_index)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        # ########## compute attention weights for the query words: better then mvlstm alone
        if self.config["text1_attention"]:
            q_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(
                            q_embed)  # use_bias=False to simple combination
            show_layer_info('Dense', q_w)
            q_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text1_maxlen'], ),
                         name="q_w")(q_w)
            show_layer_info('Lambda-softmax', q_w)
            # ########## add attention weights for Q_words
            q_w_layer = Lambda(lambda x: K.repeat_elements(
                q_w, rep=self.config['embed_size'], axis=2))(q_w)
            show_layer_info('repeat', q_w_layer)
            q_embed = Multiply()([q_w_layer, q_embed])
            show_layer_info('Dot-qw', q_embed)
        # ####################### attention text1

        # ########## compute attention weights for the document words:
        if self.config['text2_attention']:
            d_w = Dense(1,
                        kernel_initializer=self.initializer_gate,
                        use_bias=False)(d_embed)
            show_layer_info('Dense', d_w)
            d_w = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=(self.config['text2_maxlen'], ))(d_w)
            show_layer_info('Lambda-softmax', d_w)
            # ########## add attention weights for D_words
            d_w_layer = Lambda(lambda x: K.repeat_elements(
                d_w, rep=self.config['embed_size'], axis=2))(d_w)
            d_embed = Multiply()([d_w_layer, d_embed])
            show_layer_info('Dot-qw', d_embed)
        # ####################### attention text2

        cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed])
        show_layer_info('Dot', cross)
        cross_reshape = Reshape((self.config['text1_maxlen'],
                                 self.config['text2_maxlen'], 1))(cross)
        show_layer_info('Reshape', cross_reshape)

        conv2d = Conv2D(self.config['kernel_count'],
                        self.config['kernel_size'],
                        padding='same',
                        activation='relu')
        dpool = DynamicMaxPooling(self.config['dpool_size'][0],
                                  self.config['dpool_size'][1])

        conv1 = conv2d(cross_reshape)
        show_layer_info('Conv2D', conv1)
        pool1 = dpool([conv1, dpool_index])
        show_layer_info('DynamicMaxPooling', pool1)
        pool1_flat = Flatten()(pool1)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)
        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        return model
コード例 #9
0
ファイル: list_generator.py プロジェクト: RuijieRa/MatchZoo
 def get_batch_generator(self):
     for X1, X1_len, X2, X2_len, Y, ID_pairs, list_counts in self.get_batch():
         if self.config['use_dpool']:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID': ID_pairs, 'list_counts': list_counts}, Y)
         else:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID': ID_pairs, 'list_counts': list_counts}, Y)
コード例 #10
0
ファイル: matchpyramid.py プロジェクト: FrankBlood/MatchZoo
    def build(self):
        query = Input(name='query', shape=(self.config['text1_maxlen'], ))
        show_layer_info('Input', query)
        doc = Input(name='doc', shape=(self.config['text2_maxlen'], ))
        show_layer_info('Input', doc)
        dpool_index = Input(name='dpool_index',
                            shape=[
                                self.config['text1_maxlen'],
                                self.config['text2_maxlen'], 3
                            ],
                            dtype='int32')
        show_layer_info('Input', dpool_index)

        embedding = Embedding(self.config['vocab_size'],
                              self.config['embed_size'],
                              weights=[self.config['embed']],
                              trainable=self.embed_trainable)
        q_embed = embedding(query)
        show_layer_info('Embedding', q_embed)
        d_embed = embedding(doc)
        show_layer_info('Embedding', d_embed)

        cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed])

        # def cal_binsum(cross, bin_num=20):
        #     shape = cross.get_shape()
        #     qnum = shape[1]
        #     mbinsum = np.zeros((qnum, bin_num), dtype=np.float32)
        #     for (i, j), v in np.ndenumerate(cross):
        #         if i >= qnum:
        #             break
        #         vid = int((v + 1.) / 2. * (bin_num - 1.))
        #         mbinsum[i][vid] += v
        #         # mhist += 1. # smooth is not needed for computing bin sum
        #         # mhist = np.log10(mhist) # not needed for computing  bin sum
        #
        #     return mbinsum.flatten()
        #
        # bins = Lambda(lambda x: cal_binsum(x))(cross)
        # out1 = Dense(50)(bins)

        show_layer_info('Dot', cross)
        cross_reshape = Reshape((self.config['text1_maxlen'],
                                 self.config['text2_maxlen'], 1))(cross)
        show_layer_info('Reshape', cross_reshape)

        conv2d = Conv2D(self.config['kernel_count'],
                        self.config['kernel_size'],
                        padding='same',
                        activation='relu')
        dpool = DynamicMaxPooling(self.config['dpool_size'][0],
                                  self.config['dpool_size'][1])

        conv1 = conv2d(cross_reshape)
        show_layer_info('Conv2D', conv1)
        pool1 = dpool([conv1, dpool_index])
        show_layer_info('DynamicMaxPooling', pool1)
        pool1_flat = Flatten()(pool1)
        show_layer_info('Flatten', pool1_flat)
        pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat)
        show_layer_info('Dropout', pool1_flat_drop)
        if self.config['target_mode'] == 'classification':
            out_ = Dense(2, activation='softmax')(pool1_flat_drop)
        elif self.config['target_mode'] in ['regression', 'ranking']:
            out_ = Dense(1)(pool1_flat_drop)
        show_layer_info('Dense', out_)

        model = Model(inputs=[query, doc, dpool_index], outputs=out_)
        model.summary()
        return model
コード例 #11
0
ファイル: point_generator.py プロジェクト: hhh920406/MatchZoo
 def get_batch_generator(self):
     while True:
         sample = self.get_batch()
         if not sample:
             break
         X1, X1_len, X2, X2_len, Y, ID_pairs = sample
         if self.config['use_dpool']:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID':ID_pairs}, Y)
         else:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID':ID_pairs}, Y)
コード例 #12
0
ファイル: pair_generator.py プロジェクト: RuijieRa/MatchZoo
 def get_batch_generator(self):
     while True:
         X1, X1_len, X2, X2_len, Y = self.get_batch()
         if self.config['use_dpool']:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen'])}, Y)
         else:
             yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len}, Y)
コード例 #13
0
ファイル: point_generator.py プロジェクト: RuijieRa/MatchZoo
 def get_batch_generator(self):
     if self.is_train:
         while True:
             X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch()
             if self.config['use_dpool']:
                 yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen'])}, Y)
             else:
                 yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len}, Y)
     else:
         while self.point + self.batch_size <= self.total_rel_num:
             X1, X1_len, X2, X2_len, Y, ID_pairs = self.get_batch(randomly = False)
             if self.config['use_dpool']:
                 yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'dpool_index': DynamicMaxPooling.dynamic_pooling_index(X1_len, X2_len, self.config['text1_maxlen'], self.config['text2_maxlen']), 'ID':ID_pairs}, Y)
             else:
                 yield ({'query': X1, 'query_len': X1_len, 'doc': X2, 'doc_len': X2_len, 'ID':ID_pairs}, Y)