def test_predict_and_callback(self): from coco_nlp.corpus import ChineseDailyNerCorpus from coco_nlp.callbacks import EvalCallBack train_x, train_y = ChineseDailyNerCorpus.load_data('train') valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid') model = BiGRU_Model(sequence_length=10) eval_callback = EvalCallBack(coco_model=model, x_data=valid_x[:200], y_data=valid_y[:200], truncating=True, step=1) model.fit(train_x[:300], train_y[:300], valid_x[:200], valid_y[:200], epochs=1, callbacks=[eval_callback]) response = model.predict(train_x[:200], truncating=True) lengths = [len(i) for i in response] assert all([(i <= 10) for i in lengths]) response = model.predict(train_x[:200]) lengths = [len(i) for i in response] assert not all([(i <= 10) for i in lengths])
def test_load_data(self): train_x, train_y = ChineseDailyNerCorpus.load_data() assert len(train_x) == len(train_y) assert len(train_x) > 0 assert train_x[:5] != train_y[:5] test_x, test_y = ChineseDailyNerCorpus.load_data('test') assert len(test_x) == len(test_y) assert len(test_x) > 0 test_x, test_y = ChineseDailyNerCorpus.load_data('valid') assert len(test_x) == len(test_y) assert len(test_x) > 0
class TestMacros: bert_path = get_file( 'bert_sample_model', "http://s3.bmio.net/coco_nlp/bert_sample_model.tar.bz2", cache_dir=DATA_PATH, untar=True) w2v_path = get_file('sample_w2v.txt', "http://s3.bmio.net/coco_nlp/sample_w2v.txt", cache_dir=DATA_PATH) jigsaw_mini_corpus_path = get_file( 'jigsaw-toxic-comment-corpus-mini.csv', "http://s3.bmio.net/coco_nlp/jigsaw-toxic-comment-corpus-mini.csv", cache_dir=DATA_PATH) jigsaw_mini_corpus = JigsawToxicCommentCorpus(jigsaw_mini_corpus_path) chinese_daily = ChineseDailyNerCorpus.load_data('valid') smp_corpus = SMP2018ECDTCorpus.load_data('valid') # Test data for issue https://github.com/cole.zhang/coco_nlp/issues/187 custom_1 = (text_x, ner_y) @classmethod def load_labeling_corpus(cls, name=None): data_dict = { 'chinese_daily': cls.chinese_daily, # 'custom_1': cls.custom_1, } if name is None: name = random.choice(list(data_dict.keys())) return data_dict[name] @classmethod def load_classification_corpus(cls, name=None): data_dict = {'smp_corpus': cls.smp_corpus} if name is None: name = random.choice(list(data_dict.keys())) return data_dict[name] @classmethod def load_multi_label_classification_corpus(cls, name=None): return text_x, multi_label_y
def test_base_use_case(self): x, y = ChineseDailyNerCorpus.load_data('test') x = x[:200] y = y[:200] seq2seq = Seq2Seq(hidden_size=64, encoder_seq_length=64, decoder_seq_length=64) seq2seq.fit(x, y, epochs=1) res, att = seq2seq.predict(x) model_path = os.path.join(tempfile.gettempdir(), str(time.time())) seq2seq.save(model_path) s2 = Seq2Seq.load_model(model_path) res2, att2 = s2.predict(x) assert res2 == res assert (att2 == att).all()
} } def build_model_arc(self) -> None: output_dim = self.label_processor.vocab_size config = self.hyper_parameters embed_model = self.embedding.embed_model layer_stack = [ L.Bidirectional(L.LSTM(**config['layer_blstm']), name='layer_blstm'), L.Dropout(**config['layer_dropout'], name='layer_dropout'), L.Dense(output_dim, **config['layer_time_distributed']), L.Activation(**config['layer_activation']) ] tensor = embed_model.output for layer in layer_stack: tensor = layer(tensor) self.tf_model = keras.Model(embed_model.inputs, tensor) if __name__ == "__main__": from coco_nlp.corpus import ChineseDailyNerCorpus x, y = ChineseDailyNerCorpus.load_data() x_valid, y_valid = ChineseDailyNerCorpus.load_data('valid') model = BiLSTM_Model() model.fit(x, y, x_valid, y_valid, epochs=2) model.evaluate(*ChineseDailyNerCorpus.load_data('test'))
name='layer_time_distributed'), L.Activation(**config['layer_activation']) ] tensor = embed_model.output for layer in layer_stack: tensor = layer(tensor) self.tf_model = keras.Model(embed_model.inputs, tensor) if __name__ == "__main__": from coco_nlp.corpus import ChineseDailyNerCorpus from coco_nlp.callbacks import EvalCallBack train_x, train_y = ChineseDailyNerCorpus.load_data('train') valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid') test_x, test_y = ChineseDailyNerCorpus.load_data('test') model = BiGRU_Model(sequence_length=10) eval_callback = EvalCallBack(coco_model=model, x_data=valid_x, y_data=valid_y, truncating=True, step=1) model.fit(train_x[:300], train_y[:300], valid_x, valid_y,
token_out.append(next_tokens) if next_tokens == eos_token_id: break dec_input = tf.expand_dims([next_tokens], 0) r = self.decoder_processor.inverse_transform([token_out])[0] results.append(r) attentions.append(attention_plot) return results, np.array(attentions) if __name__ == "__main__": from coco_nlp.corpus import ChineseDailyNerCorpus import logging logging.basicConfig( level='INFO', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') x, y = ChineseDailyNerCorpus.load_data('test') x, y = x[:100], y[:100] seq2seq = Seq2Seq(hidden_size=256) seq2seq.build_model(x, y) seq2seq.save('./seq2seq_model') s = Seq2Seq.load_model('./seq2seq_model') res, att = seq2seq.predict(x[:10]) res2, att2 = s.predict(x[:10]) print(res == res2) print((att == att2).all())