コード例 #1
0
    def test_predict_and_callback(self):
        from coco_nlp.corpus import ChineseDailyNerCorpus
        from coco_nlp.callbacks import EvalCallBack

        train_x, train_y = ChineseDailyNerCorpus.load_data('train')
        valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid')

        model = BiGRU_Model(sequence_length=10)

        eval_callback = EvalCallBack(coco_model=model,
                                     x_data=valid_x[:200],
                                     y_data=valid_y[:200],
                                     truncating=True,
                                     step=1)

        model.fit(train_x[:300],
                  train_y[:300],
                  valid_x[:200],
                  valid_y[:200],
                  epochs=1,
                  callbacks=[eval_callback])
        response = model.predict(train_x[:200], truncating=True)
        lengths = [len(i) for i in response]
        assert all([(i <= 10) for i in lengths])

        response = model.predict(train_x[:200])
        lengths = [len(i) for i in response]
        assert not all([(i <= 10) for i in lengths])
コード例 #2
0
    def test_load_data(self):
        train_x, train_y = ChineseDailyNerCorpus.load_data()
        assert len(train_x) == len(train_y)
        assert len(train_x) > 0
        assert train_x[:5] != train_y[:5]

        test_x, test_y = ChineseDailyNerCorpus.load_data('test')
        assert len(test_x) == len(test_y)
        assert len(test_x) > 0

        test_x, test_y = ChineseDailyNerCorpus.load_data('valid')
        assert len(test_x) == len(test_y)
        assert len(test_x) > 0
コード例 #3
0
ファイル: test_macros.py プロジェクト: johncolezhang/coco_nlp
class TestMacros:
    bert_path = get_file(
        'bert_sample_model',
        "http://s3.bmio.net/coco_nlp/bert_sample_model.tar.bz2",
        cache_dir=DATA_PATH,
        untar=True)

    w2v_path = get_file('sample_w2v.txt',
                        "http://s3.bmio.net/coco_nlp/sample_w2v.txt",
                        cache_dir=DATA_PATH)

    jigsaw_mini_corpus_path = get_file(
        'jigsaw-toxic-comment-corpus-mini.csv',
        "http://s3.bmio.net/coco_nlp/jigsaw-toxic-comment-corpus-mini.csv",
        cache_dir=DATA_PATH)

    jigsaw_mini_corpus = JigsawToxicCommentCorpus(jigsaw_mini_corpus_path)

    chinese_daily = ChineseDailyNerCorpus.load_data('valid')

    smp_corpus = SMP2018ECDTCorpus.load_data('valid')

    # Test data for issue https://github.com/cole.zhang/coco_nlp/issues/187
    custom_1 = (text_x, ner_y)

    @classmethod
    def load_labeling_corpus(cls, name=None):
        data_dict = {
            'chinese_daily': cls.chinese_daily,
            # 'custom_1': cls.custom_1,
        }

        if name is None:
            name = random.choice(list(data_dict.keys()))
        return data_dict[name]

    @classmethod
    def load_classification_corpus(cls, name=None):
        data_dict = {'smp_corpus': cls.smp_corpus}

        if name is None:
            name = random.choice(list(data_dict.keys()))
        return data_dict[name]

    @classmethod
    def load_multi_label_classification_corpus(cls, name=None):
        return text_x, multi_label_y
コード例 #4
0
    def test_base_use_case(self):
        x, y = ChineseDailyNerCorpus.load_data('test')
        x = x[:200]
        y = y[:200]
        seq2seq = Seq2Seq(hidden_size=64,
                          encoder_seq_length=64,
                          decoder_seq_length=64)
        seq2seq.fit(x, y, epochs=1)
        res, att = seq2seq.predict(x)

        model_path = os.path.join(tempfile.gettempdir(), str(time.time()))
        seq2seq.save(model_path)

        s2 = Seq2Seq.load_model(model_path)
        res2, att2 = s2.predict(x)

        assert res2 == res
        assert (att2 == att).all()
コード例 #5
0
            }
        }

    def build_model_arc(self) -> None:
        output_dim = self.label_processor.vocab_size

        config = self.hyper_parameters
        embed_model = self.embedding.embed_model

        layer_stack = [
            L.Bidirectional(L.LSTM(**config['layer_blstm']), name='layer_blstm'),
            L.Dropout(**config['layer_dropout'], name='layer_dropout'),
            L.Dense(output_dim, **config['layer_time_distributed']),
            L.Activation(**config['layer_activation'])
        ]
        tensor = embed_model.output
        for layer in layer_stack:
            tensor = layer(tensor)

        self.tf_model = keras.Model(embed_model.inputs, tensor)


if __name__ == "__main__":
    from coco_nlp.corpus import ChineseDailyNerCorpus

    x, y = ChineseDailyNerCorpus.load_data()
    x_valid, y_valid = ChineseDailyNerCorpus.load_data('valid')
    model = BiLSTM_Model()
    model.fit(x, y, x_valid, y_valid, epochs=2)
    model.evaluate(*ChineseDailyNerCorpus.load_data('test'))
コード例 #6
0
                              name='layer_time_distributed'),
            L.Activation(**config['layer_activation'])
        ]

        tensor = embed_model.output
        for layer in layer_stack:
            tensor = layer(tensor)

        self.tf_model = keras.Model(embed_model.inputs, tensor)


if __name__ == "__main__":
    from coco_nlp.corpus import ChineseDailyNerCorpus
    from coco_nlp.callbacks import EvalCallBack

    train_x, train_y = ChineseDailyNerCorpus.load_data('train')
    valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid')
    test_x, test_y = ChineseDailyNerCorpus.load_data('test')

    model = BiGRU_Model(sequence_length=10)

    eval_callback = EvalCallBack(coco_model=model,
                                 x_data=valid_x,
                                 y_data=valid_y,
                                 truncating=True,
                                 step=1)

    model.fit(train_x[:300],
              train_y[:300],
              valid_x,
              valid_y,
コード例 #7
0
ファイル: model.py プロジェクト: johncolezhang/coco_nlp
                token_out.append(next_tokens)
                if next_tokens == eos_token_id:
                    break
                dec_input = tf.expand_dims([next_tokens], 0)
            r = self.decoder_processor.inverse_transform([token_out])[0]
            results.append(r)
            attentions.append(attention_plot)
        return results, np.array(attentions)


if __name__ == "__main__":
    from coco_nlp.corpus import ChineseDailyNerCorpus
    import logging

    logging.basicConfig(
        level='INFO',
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    x, y = ChineseDailyNerCorpus.load_data('test')
    x, y = x[:100], y[:100]

    seq2seq = Seq2Seq(hidden_size=256)
    seq2seq.build_model(x, y)
    seq2seq.save('./seq2seq_model')

    s = Seq2Seq.load_model('./seq2seq_model')
    res, att = seq2seq.predict(x[:10])
    res2, att2 = s.predict(x[:10])
    print(res == res2)
    print((att == att2).all())