def test_case(self):
        def prepare_fake_dataset():
            mean = np.array([-3, -3])
            cov = np.array([[1, 0], [0, 1]])
            class_A = np.random.multivariate_normal(mean, cov, size=(1000,))

            mean = np.array([3, 3])
            cov = np.array([[1, 0], [0, 1]])
            class_B = np.random.multivariate_normal(mean, cov, size=(1000,))

            data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=[0.0]) for item in class_A] +
                               [Instance(x=[float(item[0]), float(item[1])], y=[1.0]) for item in class_B])
            return data_set

        data_set = prepare_fake_dataset()
        data_set.set_input("x")
        data_set.set_target("y")

        model = NaiveClassifier(2, 1)

        trainer = Trainer(data_set, model,
                          loss=BCELoss(pred="predict", target="y"),
                          n_epochs=1,
                          batch_size=32,
                          print_every=50,
                          optimizer=SGD(lr=0.1),
                          check_code_level=2,
                          use_tqdm=False,
                          callbacks=[EchoCallback()])
        trainer.train()
Exemple #2
0
def workflow():

    train_data, valid_data, test_data, vocab, speech_vocab = prepare_data()

    ## Set the corresponding tags for each dataset, which will be used in the Trainer
    train_data.set_input("token_index_list", "origin_len", "speech_index_list")
    test_data.set_input("token_index_list", "origin_len", "speech_index_list")
    valid_data.set_input("token_index_list", "origin_len", "speech_index_list")

    train_data.set_target("speech_index_list")
    test_data.set_target("speech_index_list")
    valid_data.set_target("speech_index_list")

    ## Build the model
    config = {
        "vocab_size": len(vocab),
        "word_emb_dim": args.word_emb,
        "rnn_hidden_units": args.rnn_hidden,
        "num_classes": len(speech_vocab),
        "bi_direction": args.bilstm
    }

    ## Load the model from scratch or from saved model
    if args.cont:
        model = torch.load(args.cont)
    else:
        model = BiLSTMCRF(config)

    if args.mode == "train":
        ##Choose the optimizer
        optimizer = Adam(lr=args.lr) if args.op else SGD(lr=args.lr)

        ## Train the model
        trainer = Trainer(model=model,
                          train_data=train_data,
                          dev_data=valid_data,
                          use_cuda=args.cuda,
                          metrics=PosMetric(pred='pred',
                                            target='speech_index_list'),
                          optimizer=optimizer,
                          n_epochs=args.epoch,
                          batch_size=args.batch_size,
                          save_path="./save")
        trainer.train()

    ## Test the model
    tester = Tester(
        data=test_data,
        model=model,
        metrics=PosMetric(pred='pred', target='speech_index_list'),
        use_cuda=args.cuda,
    )
    tester.test()
Exemple #3
0
 def test_echo_callback(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set,
                       model,
                       loss=BCELoss(pred="predict", target="y"),
                       n_epochs=2,
                       batch_size=32,
                       print_every=50,
                       optimizer=SGD(lr=0.1),
                       check_code_level=2,
                       use_tqdm=False,
                       callbacks=[EchoCallback()])
     trainer.train()
Exemple #4
0
 def test_KeyBoardInterrupt(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set,
                       model,
                       loss=BCELoss(pred="predict", target="y"),
                       n_epochs=5,
                       batch_size=32,
                       print_every=50,
                       optimizer=SGD(lr=0.1),
                       check_code_level=2,
                       use_tqdm=False,
                       callbacks=[ControlC(False)])
     trainer.train()
Exemple #5
0
 def test_early_stop(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set,
                       model,
                       loss=BCELoss(pred="predict", target="y"),
                       n_epochs=20,
                       batch_size=32,
                       print_every=50,
                       optimizer=SGD(lr=0.01),
                       check_code_level=2,
                       use_tqdm=False,
                       dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"),
                       callbacks=[EarlyStopCallback(5)])
     trainer.train()
Exemple #6
0
 def test_TensorboardCallback(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set,
                       model,
                       loss=BCELoss(pred="predict", target="y"),
                       n_epochs=5,
                       batch_size=32,
                       print_every=50,
                       optimizer=SGD(lr=0.1),
                       check_code_level=2,
                       use_tqdm=False,
                       dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"),
                       callbacks=[TensorboardCallback("loss", "metric")])
     trainer.train()
Exemple #7
0
 def test_gradient_clip(self):
     data_set, model = prepare_env()
     trainer = Trainer(
         data_set,
         model,
         loss=BCELoss(pred="predict", target="y"),
         n_epochs=20,
         batch_size=32,
         print_every=50,
         optimizer=SGD(lr=0.1),
         check_code_level=2,
         use_tqdm=False,
         dev_data=data_set,
         metrics=AccuracyMetric(pred="predict", target="y"),
         callbacks=[GradientClipCallback(model.parameters(), clip_value=2)])
     trainer.train()
Exemple #8
0
    def test_case(self):
        data_set = prepare_fake_dataset()
        data_set.set_input("x", flag=True)
        data_set.set_target("y", flag=True)

        train_set, dev_set = data_set.split(0.3)

        model = NaiveClassifier(2, 1)

        trainer = Trainer(train_set,
                          model,
                          loss=BCELoss(pred="predict", target="y"),
                          metrics=AccuracyMetric(pred="predict", target="y"),
                          n_epochs=10,
                          batch_size=32,
                          print_every=50,
                          validate_every=-1,
                          dev_data=dev_set,
                          optimizer=SGD(lr=0.1),
                          check_code_level=2,
                          use_tqdm=True,
                          save_path=None)
        trainer.train()
        """
import data_process
import model

if __name__ == '__main__':
    tpds = data_process.TangPoemDataset(maxLength=20,
                                        useBigData=True,
                                        useSmallData=False)
    tpds.loadCharEmbedding()
    m = model.HCLSTM(numEmbd=tpds.totalWords,
                     hidden_size=300,
                     weight=tpds.weight,
                     embedding=tpds.embedding,
                     usePreEmbedding=True)
    m.cuda()
    ADAMOP = Adam()
    SGDMmOp = SGD(lr=0.001, momentum=0.9)
    trainner = Trainer(tpds.trainSet,
                       model=m,
                       check_code_level=0,
                       n_epochs=150,
                       batch_size=128,
                       metric_key="PPL",
                       dev_data=tpds.testSet,
                       metrics=core.metrics.AccuracyMetric(target="output_s"),
                       optimizer=ADAMOP)
    trainner.train()
    torch.save(m.state_dict(), 'model.pkl')
    #m.load_state_dict(torch.load('model.pkl'))
    m = m.cpu()
    pred = m.runStartWith('日', tpds.vocab, 19)
    print(m.convertOutput(pred, tpds.vocab))
Exemple #10
0
    def test_SGD(self):
        optim = SGD(model_params=torch.nn.Linear(10, 3).parameters())
        self.assertTrue("lr" in optim.__dict__["settings"])
        self.assertTrue("momentum" in optim.__dict__["settings"])
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.SGD))

        optim = SGD(lr=0.001)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.001)
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.SGD))

        optim = SGD(lr=0.002, momentum=0.989)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.002)
        self.assertEqual(optim.__dict__["settings"]["momentum"], 0.989)

        optim = SGD(0.001)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.001)
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.SGD))

        with self.assertRaises(TypeError):
            _ = SGD("???")
        with self.assertRaises(TypeError):
            _ = SGD(0.001, lr=0.002)