Beispiel #1
0
 def test(self):
     predictor = Predictor()
     model = Linear(2, 1)
     data = prepare_fake_dataset()
     data.set_input("x")
     ans = predictor.predict(model, data)
     self.assertEqual(len(ans), 2000)
     self.assertTrue(isinstance(ans[0], torch.Tensor))
 def test_simple(self):
     model = LinearModel()
     predictor = Predictor(model)
     data = prepare_fake_dataset()
     data.set_input("x")
     ans = predictor.predict(data)
     self.assertTrue(isinstance(ans, defaultdict))
     self.assertTrue("predict" in ans)
     self.assertTrue(isinstance(ans["predict"], list))
Beispiel #3
0
    def test_seq_label(self):
        model_args = {
            "vocab_size": 10,
            "word_emb_dim": 100,
            "rnn_hidden_units": 100,
            "num_classes": 5
        }

        infer_data = [['a', 'b', 'c', 'd', 'e'], ['a', '@', 'c', 'd', 'e'],
                      ['a', 'b', '#', 'd', 'e'], ['a', 'b', 'c', '?', 'e'],
                      ['a', 'b', 'c', 'd', '$'], ['!', 'b', 'c', 'd', 'e']]

        vocab = Vocabulary()
        vocab.word2idx = {
            'a': 0,
            'b': 1,
            'c': 2,
            'd': 3,
            'e': 4,
            '!': 5,
            '@': 6,
            '#': 7,
            '$': 8,
            '?': 9
        }
        class_vocab = Vocabulary()
        class_vocab.word2idx = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4}

        os.system("mkdir save")
        save_pickle(class_vocab, "./save/", "class2id.pkl")
        save_pickle(vocab, "./save/", "word2id.pkl")

        model = SeqLabeling(model_args)
        predictor = Predictor("./save/", task="seq_label")

        results = predictor.predict(network=model, data=infer_data)

        self.assertTrue(isinstance(results, list))
        self.assertGreater(len(results), 0)
        for res in results:
            self.assertTrue(isinstance(res, list))
            self.assertEqual(len(res), 5)
            self.assertTrue(isinstance(res[0], str))

        os.system("rm -rf save")
        print("pickle path deleted")
Beispiel #4
0
def predict(model, subset_for_prediction, targets, filename):
    predictor = Predictor(model)
    predictions = predictor.predict(subset_for_prediction)['pred']
    words = list(subset_for_prediction.get_field('raw_words'))
    lines = []
    # print(predictions)
    # print(f'predicted labels for {len(predictions)}/{len(words)} items')

    words_sequence_index = 1
    labels_sequence_index = 0
    for sentence in list(zip(predictions, words)):
      if type(sentence[labels_sequence_index][0]) == int:
        continue
      words = sentence[words_sequence_index]
      print(sentence[labels_sequence_index])
      labels = map(lambda label: f'{targets.to_word(label).split("-")[-1]}', sentence[labels_sequence_index][0])
      for pair in zip(words, labels):
        lines.append('\t'.join(pair))
      lines.append('')
    write_lines(filename, lines)
Beispiel #5
0
    def _predict(self, subset_for_prediction, targets, filename):
        predictor = Predictor(self)
        predictions = predictor.predict(subset_for_prediction)['pred']
        words = list(subset_for_prediction.get_field('raw_words'))
        lines = []

        words_sequence_index = 1
        labels_sequence_index = 0
        for sentence in list(zip(predictions, words)):
            if type(sentence[labels_sequence_index][0]) == int:
                continue
            words = sentence[words_sequence_index]
            #print(sentence[labels_sequence_index])
            #labels = map(lambda label: f'{targets.to_word(label).split("-")[-1]}', sentence[labels_sequence_index][0])
            labels = map(lambda label: f'{targets.to_word(label)}',
                         sentence[labels_sequence_index][0])
            for pair in zip(words, labels):
                lines.append(' '.join(pair))
            lines.append('')
        if filename is not None:
            write_lines(filename, lines)
        return lines
Beispiel #6
0
def infer():
    # Load infer configuration, the same as test
    test_args = ConfigSection()
    ConfigLoader("config.cfg").load_config("./data_for_tests/config",
                                           {"POS_test": test_args})

    # fetch dictionary size and number of labels from pickle files
    word2index = load_pickle(pickle_path, "word2id.pkl")
    test_args["vocab_size"] = len(word2index)
    index2label = load_pickle(pickle_path, "id2class.pkl")
    test_args["num_classes"] = len(index2label)

    # Define the same model
    model = SeqLabeling(test_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
    print("model loaded!")

    # Data Loader
    raw_data_loader = BaseLoader(data_infer_path)
    infer_data = raw_data_loader.load_lines()
    """
        Transform strings into list of list of strings. 
        [
            [word_11, word_12, ...],
            [word_21, word_22, ...],
            ...
        ]
        In this case, each line in "people_infer.txt" is already a sentence. So load_lines() just splits them.
    """

    # Inference interface
    infer = Predictor(pickle_path)
    results = infer.predict(model, infer_data)

    print(results)
    print("Inference finished!")
 def __init__(self, modelFile, vocabFile, addTarget2Vocab=False):
     # CHAR_INPUT="chars", 并且会转化为word_index
     self._vocabFile = vocabFile
     self._addTarget2Vocab = addTarget2Vocab
     self._CONST_CHAR = Const.CHAR_INPUT
     self._CONST_WORDS = Const.INPUT
     self._CONST_TARGET = Const.TARGET
     self._input_fields = [self._CONST_WORDS, Const.INPUT_LEN]
     self._word_counter, self._word_vocab, self._target_counter, \
     self._target_vocab, self._target = self._get_vocabs()
     self._vocab4word = Vocabulary()
     self._update_word()
     if self._addTarget2Vocab:
         self._vocab4target = Vocabulary(unknown=None, padding=None)
         self._input_fields.append(self._CONST_TARGET)
         self._update_target()
     self._model = Predictor(ModelLoader().load_pytorch_model(modelFile))
Beispiel #8
0
 label_link_dict = dict()
 for row_json in json_file_iter:
     label_link_dict[row_json['label_desc']] = row_json['label']
 logger.info(label_link_dict)
 logger.warn('开始加载模型')
 model = torch.load(model_name)
 model.eval()
 logger.info('模型加载完毕:\n{}'.format(model))
 logger.warn('获取词典')
 char_vocab = load_serialize_obj(char_vocab_pkl_file)
 logger.info('char_vocab:{}'.format(char_vocab))
 target_vocab = load_serialize_obj(target_vocab_pkl_file)
 logger.info('target_vocab:{}'.format(target_vocab))
 logger.warn('加载测试数据')
 json_file_iter = read_json_file_iter(test_data_json_file_name)
 predictor = Predictor(model)
 with codecs.open(
         predict_output_json_file_name, mode='w',
         encoding='utf8') as fw_json, codecs.open(predict_output_file_name,
                                                  mode='w',
                                                  encoding='utf8') as fw:
     for i, row_json in enumerate(json_file_iter):
         if i % 100 == 0:
             logger.info('predict row:{}'.format(i))
         sentence = row_json.get('sentence', '')
         keywords = row_json.get('keywords', '')
         text = remove_blank('{}{}'.format(sentence, keywords))
         input_data = []
         test_data = [list(text)]
         # logger.info('test_data len:{}'.format(len(test_data)))
         # logger.warn('输入数据预处理')
Beispiel #9
0
                      optimizer,
                      loss,
                      args.batch,
                      n_epochs=args.epoch,
                      dev_data=datasets['dev'],
                      metrics=metrics,
                      device=device,
                      callbacks=create_cb(),
                      dev_batch_size=args.test_batch,
                      test_use_tqdm=False,
                      check_code_level=-1,
                      update_every=args.update_every)
    trainer.train()
    print('Evaluating...')
    with torch.no_grad():
        model = Predictor(model)
        pred = model.predict(
            datasets['dev'],
            seq_len_field_name='seq_len',
        )['pred']
    pred = [[vocabs['label'].to_word(ele) for ele in arr] for arr in pred]
    target = list(datasets['dev']['target'])
    target = [[vocabs['label'].to_word(ele) for ele in arr] for arr in target]
    cls_res = classification_report(target, pred)

    print(cls_res)
    print('=============================')
    visualize_error(datasets['dev'], target, pred)
    # Prediction to aicup data
    if args.do_pred:
        print('predicting...')
Beispiel #10
0
    def test_seq_label(self):
        model_args = {
            "vocab_size": 10,
            "word_emb_dim": 100,
            "rnn_hidden_units": 100,
            "num_classes": 5
        }

        infer_data = [['a', 'b', 'c', 'd', 'e'], ['a', '@', 'c', 'd', 'e'],
                      ['a', 'b', '#', 'd', 'e'], ['a', 'b', 'c', '?', 'e'],
                      ['a', 'b', 'c', 'd', '$'], ['!', 'b', 'c', 'd', 'e']]

        vocab = Vocabulary()
        vocab.word2idx = {
            'a': 0,
            'b': 1,
            'c': 2,
            'd': 3,
            'e': 4,
            '!': 5,
            '@': 6,
            '#': 7,
            '$': 8,
            '?': 9
        }
        class_vocab = Vocabulary()
        class_vocab.word2idx = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4}

        os.system("mkdir save")
        save_pickle(class_vocab, "./save/", "label2id.pkl")
        save_pickle(vocab, "./save/", "word2id.pkl")

        model = CNNText(model_args)
        import fastNLP.core.predictor as pre
        predictor = Predictor("./save/", pre.text_classify_post_processor)

        # Load infer data
        infer_data_set = convert_seq_dataset(infer_data)
        infer_data_set.index_field("word_seq", vocab)

        results = predictor.predict(network=model, data=infer_data_set)

        self.assertTrue(isinstance(results, list))
        self.assertGreater(len(results), 0)
        self.assertEqual(len(results), len(infer_data))
        for res in results:
            self.assertTrue(isinstance(res, str))
            self.assertTrue(res in class_vocab.word2idx)

        del model, predictor
        infer_data_set.set_origin_len("word_seq")

        model = SeqLabeling(model_args)
        predictor = Predictor("./save/", pre.seq_label_post_processor)

        results = predictor.predict(network=model, data=infer_data_set)
        self.assertTrue(isinstance(results, list))
        self.assertEqual(len(results), len(infer_data))
        for i in range(len(infer_data)):
            res = results[i]
            self.assertTrue(isinstance(res, list))
            self.assertEqual(len(res), len(infer_data[i]))

        os.system("rm -rf save")
        print("pickle path deleted")