def train():

    train_data, dev_data, test_data, vocab = get_train_dev_test_vocab()

    model = CNNText(vocab_size=len(vocab), embedding_dim=50, output_size=20)
    model = torch.load(load_path)

    loss = CrossEntropyLoss(pred=Const.OUTPUT, target=Const.TARGET)

    metrics = AccuracyMetric(pred=Const.OUTPUT, target=Const.TARGET)
    '''
    trainer = Trainer(model=model,
                      train_data=train_data, 
                      dev_data=dev_data, 
                      loss=loss, 
                      metrics=metrics, 
                      n_epochs=100, 
                      save_path=checkpoint_path)

    trainer.train()
    '''

    tester = Tester(test_data, model, metrics=AccuracyMetric())

    tester.test()
Example #2
0
def test_control_C():
    # 用于测试 ControlC , 再两次训练时用 Control+C 进行退出,如果最后不显示 "Test failed!" 则通过测试
    from fastNLP import ControlC, Callback
    import time
    
    line1 = "\n\n\n\n\n*************************"
    line2 = "*************************\n\n\n\n\n"
    
    class Wait(Callback):
        def on_epoch_end(self):
            time.sleep(5)
    
    data_set, model = prepare_env()
    
    print(line1 + "Test starts!" + line2)
    trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                      batch_size=32, n_epochs=20, dev_data=data_set,
                      metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                      callbacks=[Wait(), ControlC(False)], check_code_level=2)
    trainer.train()
    
    print(line1 + "Program goes on ..." + line2)
    
    trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                      batch_size=32, n_epochs=20, dev_data=data_set,
                      metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                      callbacks=[Wait(), ControlC(True)], check_code_level=2)
    trainer.train()
    
    print(line1 + "Test failed!" + line2)
Example #3
0
def bilstm_text():
    w = pickle.load(open("weight.bin", "rb"))

    (vocab, train_data, dev_data, test_data) = read_data()

    model_lstm = MyBLSTMText(class_num=4,
                             vocab_size=len(vocab),
                             dropout=0.5,
                             embed_weights=w)
    loss = CrossEntropyLoss()
    metrics = AccuracyMetric()
    trainer = Trainer(model=model_lstm,
                      train_data=train_data,
                      dev_data=dev_data,
                      optimizer=Adam(lr=0.0015),
                      print_every=10,
                      use_tqdm=False,
                      device='cuda:0',
                      save_path="./lstm_model",
                      loss=loss,
                      metrics=metrics)
    # callbacks=[EarlyStopCallback(10)])

    trainer.train()

    tester = Tester(test_data, model_lstm, metrics=AccuracyMetric())
    tester.test()
Example #4
0
    def test_AccuracyMetric1(self):
        # (1) only input, targets passed
        pred_dict = {"pred": torch.zeros(4, 3)}
        target_dict = {'target': torch.zeros(4)}
        metric = AccuracyMetric()

        metric(pred_dict=pred_dict, target_dict=target_dict)
        print(metric.get_metric())
Example #5
0
 def test_AccuaryMetric7(self):
     # (7) check map, match
     metric = AccuracyMetric(pred='predictions', target='targets')
     pred_dict = {"predictions": torch.randn(4, 3, 2)}
     target_dict = {'targets': torch.zeros(4, 3)}
     metric(pred_dict=pred_dict, target_dict=target_dict)
     res = metric.get_metric()
     ans = (torch.argmax(pred_dict["predictions"], dim=2).float() == target_dict["targets"]).float().mean()
     self.assertAlmostEqual(res["acc"], float(ans), places=4)
Example #6
0
 def test_AccuaryMetric5(self):
     # (5) check reset
     metric = AccuracyMetric()
     pred_dict = {"pred": torch.randn(4, 3, 2)}
     target_dict = {'target': torch.zeros(4, 3)}
     metric(pred_dict=pred_dict, target_dict=target_dict)
     res = metric.get_metric(reset=False)
     ans = (torch.argmax(pred_dict["pred"], dim=2).float() == target_dict["target"]).float().mean()
     self.assertAlmostEqual(res["acc"], float(ans), places=4)
Example #7
0
 def test_AccuaryMetric8(self):
     try:
         metric = AccuracyMetric(pred='predictions', target='targets')
         pred_dict = {"prediction": torch.zeros(4, 3, 2)}
         target_dict = {'targets': torch.zeros(4, 3)}
         metric(pred_dict=pred_dict, target_dict=target_dict, )
         self.assertDictEqual(metric.get_metric(), {'acc': 1})
     except Exception as e:
         print(e)
         return
     self.assertTrue(True, False), "No exception catches."
Example #8
0
 def test_AccuaryMetric4(self):
     # (5) check reset
     metric = AccuracyMetric()
     pred_dict = {"pred": torch.randn(4, 3, 2)}
     target_dict = {'target': torch.ones(4, 3)}
     metric(pred_dict=pred_dict, target_dict=target_dict)
     ans = torch.argmax(pred_dict["pred"], dim=2).to(target_dict["target"]) == target_dict["target"]
     res = metric.get_metric()
     self.assertTrue(isinstance(res, dict))
     self.assertTrue("acc" in res)
     self.assertAlmostEqual(res["acc"], float(ans.float().mean()), places=3)
Example #9
0
 def test_evaluate_callback(self):
     data_set, model = prepare_env()
     from fastNLP import Tester
     tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
     evaluate_callback = EvaluateCallback(data_set, tester)
     
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
                       callbacks=evaluate_callback, check_code_level=2)
     trainer.train()
Example #10
0
 def test_AccuaryMetric10(self):
     # (10) check _fast_metric
     try:
         metric = AccuracyMetric()
         pred_dict = {"predictions": torch.zeros(4, 3, 2), "seq_len": torch.ones(3) * 3}
         target_dict = {'targets': torch.zeros(4, 3)}
         metric(pred_dict=pred_dict, target_dict=target_dict)
         self.assertDictEqual(metric.get_metric(), {'acc': 1})
     except Exception as e:
         print(e)
         return
     self.assertTrue(True, False), "No exception catches."
Example #11
0
 def test_seq_len(self):
     N = 256
     seq_len = torch.zeros(N).long()
     seq_len[0] = 2
     pred = {'pred': torch.ones(N, 2)}
     target = {'target': torch.ones(N, 2), 'seq_len': seq_len}
     metric = AccuracyMetric()
     metric(pred_dict=pred, target_dict=target)
     self.assertDictEqual(metric.get_metric(), {'acc': 1.})
     seq_len[1:] = 1
     metric(pred_dict=pred, target_dict=target)
     self.assertDictEqual(metric.get_metric(), {'acc': 1.})
Example #12
0
 def test_AccuaryMetric9(self):
     # (9) check map, include unused
     try:
         metric = AccuracyMetric(pred='prediction', target='targets')
         pred_dict = {"prediction": torch.zeros(4, 3, 2), 'unused': 1}
         target_dict = {'targets': torch.zeros(4, 3)}
         metric(pred_dict=pred_dict, target_dict=target_dict)
         self.assertDictEqual(metric.get_metric(), {'acc': 1})
     except Exception as e:
         print(e)
         return
     self.assertTrue(True, False), "No exception catches."
Example #13
0
 def test_fitlog_callback(self):
     import fitlog
     fitlog.set_log_dir(self.tempdir, new_log=True)
     data_set, model = prepare_env()
     from fastNLP import Tester
     tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
     fitlog_callback = FitlogCallback(data_set, tester)
     
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                       callbacks=fitlog_callback, check_code_level=2)
     trainer.train()
Example #14
0
 def test_AccuracyMetric2(self):
     # (2) with corrupted size
     try:
         pred_dict = {"pred": torch.zeros(4, 3, 2)}
         target_dict = {'target': torch.zeros(4)}
         metric = AccuracyMetric()
         
         metric(pred_dict=pred_dict, target_dict=target_dict, )
         print(metric.get_metric())
     except Exception as e:
         print(e)
         return
     print("No exception catches.")
Example #15
0
    def test_case_1(self):
        # 检查报错提示能否正确提醒用户
        dataset = prepare_fake_dataset2('x1', 'x_unused')
        dataset.rename_field('x_unused', 'x2')
        dataset.set_input('x1', 'x2')
        dataset.set_target('y', 'x1')

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(5, 4)

            def forward(self, x1, x2):
                x1 = self.fc(x1)
                x2 = self.fc(x2)
                x = x1 + x2
                time.sleep(0.1)
                # loss = F.cross_entropy(x, y)
                return {'preds': x}

        model = Model()
        with self.assertRaises(NameError):
            tester = Tester(data=dataset,
                            model=model,
                            metrics=AccuracyMetric())
            tester.test()
Example #16
0
    def test_collect_fn3(self):
        """
        测试应该会覆盖

        :return:
        """
        dataset = prepare_fake_dataset2('x1', 'x2')
        dataset.set_input('x1', 'x2')
        dataset.set_target('y')
        import torch
        def fn(ins_list):
            x = []
            for ind, ins in ins_list:
                x.append(ins['x1']+ins['x2'])
            x = torch.FloatTensor(x)
            return {'x1':torch.zeros_like(x)}, {'target':torch.zeros(x.size(0)).long(), 'y':x}
        dataset.add_collect_fn(fn)

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(5, 1, bias=False)

            def forward(self, x1):
                x1 = self.fc(x1)
                assert x1.sum()==0, "Should be replaced to one"
                # loss = F.cross_entropy(x, y)
                return {'pred': x1}

        model = Model()
        trainer = Trainer(train_data=dataset, model=model, loss=CrossEntropyLoss(), print_every=2,
                          dev_data=dataset, metrics=AccuracyMetric(), use_tqdm=False, n_epochs=1)
        best_metric = trainer.train()['best_eval']['AccuracyMetric']['acc']
        self.assertTrue(best_metric==1)
Example #17
0
    def test_collect_fn2(self):
        """测试能否实现batch_x, batch_y"""
        dataset = prepare_fake_dataset2('x1', 'x2')
        dataset.set_input('x1', 'x2')
        dataset.set_target('y', 'x1')
        import torch
        def fn(ins_list):
            x = []
            for ind, ins in ins_list:
                x.append(ins['x1']+ins['x2'])
            x = torch.FloatTensor(x)
            return {'x':x}, {'target':x[:, :4].argmax(dim=-1)}
        dataset.add_collect_fn(fn)

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(5, 4)

            def forward(self, x1, x2, x):
                x1 = self.fc(x1)
                x2 = self.fc(x2)
                x = self.fc(x)
                sum_x = x1 + x2 + x
                time.sleep(0.1)
                # loss = F.cross_entropy(x, y)
                return {'pred': sum_x}

        model = Model()
        trainer = Trainer(train_data=dataset, model=model, loss=CrossEntropyLoss(), print_every=2,
                          dev_data=dataset, metrics=AccuracyMetric(), use_tqdm=False)
        trainer.train()
Example #18
0
    def test_trainer_suggestion6(self):
        # 检查报错提示能否正确提醒用户
        # 这里传入多余参数,让其duplicate
        dataset = prepare_fake_dataset2('x1', 'x_unused')
        dataset.rename_field('x_unused', 'x2')
        dataset.set_input('x1', 'x2')
        dataset.set_target('y', 'x1')

        class Model(nn.Module):
            def __init__(self):
                super().__init__()
                self.fc = nn.Linear(5, 4)

            def forward(self, x1, x2):
                x1 = self.fc(x1)
                x2 = self.fc(x2)
                x = x1 + x2
                time.sleep(0.1)
                # loss = F.cross_entropy(x, y)
                return {'preds': x}

        model = Model()
        with self.assertRaises(NameError):
            trainer = Trainer(train_data=dataset,
                              model=model,
                              loss=CrossEntropyLoss(),
                              print_every=2,
                              dev_data=dataset,
                              metrics=AccuracyMetric(),
                              use_tqdm=False)
Example #19
0
 def test_gradient_clip(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=20, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
                       callbacks=[GradientClipCallback(model.parameters(), clip_value=2)], check_code_level=2)
     trainer.train()
Example #20
0
    def test_save_path(self):
        data_set = prepare_fake_dataset()
        data_set.set_input("x", flag=True)
        data_set.set_target("y", flag=True)

        train_set, dev_set = data_set.split(0.3)

        model = NaiveClassifier(2, 1)

        save_path = 'test_save_models'

        trainer = Trainer(train_set,
                          model,
                          optimizer=SGD(lr=0.1),
                          loss=BCELoss(pred="predict", target="y"),
                          batch_size=32,
                          n_epochs=10,
                          print_every=50,
                          dev_data=dev_set,
                          metrics=AccuracyMetric(pred="predict", target="y"),
                          validate_every=-1,
                          save_path=save_path,
                          use_tqdm=True,
                          check_code_level=2)
        trainer.train()
Example #21
0
    def test_readonly_property(self):
        from fastNLP.core.callback import Callback
        passed_epochs = []
        total_epochs = 5

        class MyCallback(Callback):
            def __init__(self):
                super(MyCallback, self).__init__()

            def on_epoch_begin(self):
                passed_epochs.append(self.epoch)
                print(self.n_epochs, self.n_steps, self.batch_size)
                print(self.model)
                print(self.optimizer)

        data_set, model = prepare_env()
        trainer = Trainer(data_set,
                          model,
                          loss=BCELoss(pred="predict", target="y"),
                          n_epochs=total_epochs,
                          batch_size=32,
                          print_every=50,
                          optimizer=SGD(lr=0.1),
                          check_code_level=2,
                          use_tqdm=False,
                          dev_data=data_set,
                          metrics=AccuracyMetric(pred="predict", target="y"),
                          callbacks=[MyCallback()])
        trainer.train()
        assert passed_epochs == list(range(1, total_epochs + 1))
    def run4(self):
        set_rng_seed(100)
        data_set, model = prepare_env()

        train_set, dev_set = data_set.split(0.3)

        model = NaiveClassifier(2, 1)

        trainer = DistTrainer(
            train_set,
            model,
            optimizer=SGD(lr=0.1),
            loss=BCELoss(pred="predict", target="y"),
            batch_size_per_gpu=32,
            n_epochs=3,
            print_every=50,
            dev_data=dev_set,
            metrics=AccuracyMetric(pred="predict", target="y"),
            validate_every=-1,
            save_path=self.save_path,
        )
        trainer.train()
        """
        # 应该正确运行
        """
        if trainer.is_master and os.path.exists(self.save_path):
            shutil.rmtree(self.save_path)
Example #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--methods",
                        "-m",
                        default="lstm",
                        choices=["rnn", "lstm", "cnn"])
    parser.add_argument("--n_epochs", "-n", default=5, type=int)
    parser.add_argument("--embedding", "-e", default=100, type=int)
    parser.add_argument("--category", "-c", default=4, type=int)
    parser.add_argument("--batch", "-b", default=4, type=int)
    parser.add_argument("--learning_rate", "-l", default=0.005, type=float)
    args = parser.parse_args()
    if args.category > 20 or args.category < 1:
        raise Exception("the number of category must be between 1 and 20")
    train_data, test_data, dic_size = handle_data(args.category)
    if args.methods == "rnn":
        model = rnn(dic_size, args.category)
        output = "rnn_model.pth"
    elif args.methods == "lstm":
        model = myLSTM(dic_size, args.category)
        output = "lstm_model.pth"
    else:
        #model = cnn(dic_size, args.category)
        model = torch.load("cnn_model.pth")
        output = "cnn_model.pth"
    trainer = Trainer(train_data,
                      model,
                      loss=CrossEntropyLoss(pred="pred", target='target'),
                      optimizer=SGD(model.parameters(), lr=args.learning_rate),
                      n_epochs=args.n_epochs,
                      dev_data=test_data,
                      metrics=AccuracyMetric(pred="pred", target='target'),
                      batch_size=args.batch)
    trainer.train()
    torch.save(model, output)
Example #24
0
    def test_trainer_data_parallel(self):
        if torch.cuda.device_count() > 1:
            from fastNLP import AccuracyMetric
            dataset = prepare_fake_dataset2('x1', 'x2')
            dataset.set_input('x1', 'x2', 'y', flag=True)

            class Model(nn.Module):
                def __init__(self):
                    super().__init__()
                    self.fc = nn.Linear(5, 4)

                def forward(self, x1, x2, y=None):
                    x1 = self.fc(x1)
                    x2 = self.fc(x2)
                    x = x1 + x2
                    if self.training:
                        loss = F.cross_entropy(x, y)
                        return {'loss': loss}
                    else:
                        return {'pred': x, 'target': y}

            model = Model()
            trainer = Trainer(train_data=dataset,
                              model=model,
                              print_every=2,
                              use_tqdm=False,
                              dev_data=dataset,
                              metrics=AccuracyMetric(),
                              device=[0, 1])
            trainer.train(load_best_model=False)
Example #25
0
 def test_early_stop(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.01), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=20, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
                       callbacks=[EarlyStopCallback(5)], check_code_level=2)
     trainer.train()
Example #26
0
    def test_AccuracyMetric3(self):
        # (3) the second batch is corrupted size
        try:
            metric = AccuracyMetric()
            pred_dict = {"pred": torch.zeros(4, 3, 2)}
            target_dict = {'target': torch.zeros(4, 3)}
            metric(pred_dict=pred_dict, target_dict=target_dict)

            pred_dict = {"pred": torch.zeros(4, 3, 2)}
            target_dict = {'target': torch.zeros(4)}
            metric(pred_dict=pred_dict, target_dict=target_dict)

            print(metric.get_metric())
        except Exception as e:
            print(e)
            return
        self.assertTrue(True, False), "No exception catches."
Example #27
0
 def test_warmup_callback(self):
     data_set, model = prepare_env()
     warmup_callback = WarmupCallback()
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                       callbacks=warmup_callback, check_code_level=2)
     trainer.train()
Example #28
0
    def test_CheckPointCallback(self):

        from fastNLP import CheckPointCallback, Callback
        from fastNLP import Tester

        class RaiseCallback(Callback):
            def __init__(self, stop_step=10):
                super().__init__()
                self.stop_step = stop_step

            def on_backward_begin(self, loss):
                if self.step > self.stop_step:
                    raise RuntimeError()

        data_set, model = prepare_env()
        tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
        import fitlog

        fitlog.set_log_dir(self.tempdir, new_log=True)
        tempfile_path = os.path.join(self.tempdir, 'chkt.pt')
        callbacks = [CheckPointCallback(tempfile_path)]

        fitlog_callback = FitlogCallback(data_set, tester)
        callbacks.append(fitlog_callback)

        callbacks.append(RaiseCallback(100))
        try:
            trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                              batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                              metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                              callbacks=callbacks, check_code_level=2)
            trainer.train()
        except:
            pass
        #  用下面的代码模拟重新运行
        data_set, model = prepare_env()
        callbacks = [CheckPointCallback(tempfile_path)]
        tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
        fitlog_callback = FitlogCallback(data_set, tester)
        callbacks.append(fitlog_callback)

        trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                          batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
                          metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
                          callbacks=callbacks, check_code_level=2)
        trainer.train()
Example #29
0
    def __init__(self, masker, task_lst, vocabs, optimizer, args):
        """
        :param model: 模型
        :param description: 模型描述
        :param task_lst: 任务列表
        :param optimizer: 优化器
        :param log_path: TensorboardX存储文件夹
        :param save_path: 模型存储位置
        :param accumulation_steps: 累积梯度
        :param print_every: 评估间隔
        """
        self.logger = fastNLP.logger

        self.masker = masker
        self.task_lst = task_lst
        self.save_path = args.save_path
        self.description = args.exp_name
        self.optim = optimizer
        self.vocabs = vocabs
        n_steps = (int(
            len(task_lst) * len(task_lst[0].train_set) * 100 / args.batch_size)
                   + 1)
        args.n_steps = n_steps
        self.epoch_scheduler = get_scheduler(args, self.optim)
        self.scheduler = None
        self.logger.info('Using scheduler {}'.format(self.scheduler))
        self.accumulation_steps = args.accumulation_steps
        self.print_every = args.print_every
        self.batch_size = args.batch_size
        self.save_ep = args.save_ep

        include_tasks = args.tasks
        if include_tasks is None:
            self.empty_tasks = set()
        else:
            self.empty_tasks = set(range(len(
                self.task_lst))) - set(include_tasks)

        self.steps = 0
        self.best_acc = 0
        self.best_epoch = 0

        self.metrics = []
        for t in task_lst:
            if has_acc(t.task_name):
                self.metrics.append(AccuracyMetric())
            else:
                self.metrics.append(
                    SpanFPreRecMetric(
                        self.vocabs[t.task_name],
                        encoding_type="bioes"
                        if t.task_name == "ner" else "bio",
                    ))
        # self.logger.info(self.metrics)

        tb_path = "eval" if args.evaluate else "train"
        self.summary_writer = SummaryWriter(os.path.join(
            args.tb_path, tb_path))
Example #30
0
 def test_lr_scheduler(self):
     data_set, model = prepare_env()
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
     trainer = Trainer(data_set, model, optimizer=optimizer, loss=BCELoss(pred="predict", target="y"), batch_size=32,
                       n_epochs=5, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
                       callbacks=[LRScheduler(torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1))],
                       check_code_level=2)
     trainer.train()