Ejemplo n.º 1
0
    def test_console_log_callback(self):
        total_instances = 1000
        batch_size = 25

        reader = FakeDatasetReader(total_instances, batch_size)
        data_loader = SimpleDataLoader.from_dataset_reader(
            reader, "fake_path", batch_size=batch_size)
        instances = list(data_loader.iter_instances())
        vocab = Vocabulary.from_instances(instances)
        data_loader.index_with(vocab)
        model = FakeModel(vocab)
        optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9)

        trainer = GradientDescentTrainer(
            model,
            optimizer,
            data_loader,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=[
                ConsoleLoggerCallback.from_params(
                    Params({"should_log_inputs": True}),
                    serialization_dir=self.TEST_DIR,
                )
            ],
        )
        trainer.train()
Ejemplo n.º 2
0
    def test_trainer_can_log_batch_inputs(self):
        total_instances = 1000
        batch_size = 25

        reader = FakeDatasetReader(total_instances, batch_size)
        data_loader = SimpleDataLoader.from_dataset_reader(
            reader, "fake_path", batch_size=batch_size)
        instances = list(data_loader.iter_instances())
        vocab = Vocabulary.from_instances(instances)
        data_loader.index_with(vocab)
        model = FakeModel(vocab)
        optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9)

        trainer = GradientDescentTrainer(
            model,
            optimizer,
            data_loader,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            callbacks=[
                TensorBoardCallback(
                    serialization_dir=self.TEST_DIR,
                    distribution_interval=2,
                )
            ],
        )
        trainer.train()
Ejemplo n.º 3
0
                               vocab=vocab)
    tgt_char_encoder = TokenCharactersEncoder(embedding=tgt_char_embedding,
                                              encoder=GruSeq2VecEncoder(input_size=args.emb_dim,
                                                                        hidden_size=args.hid_dim))
    src_embedders = BasicTextFieldEmbedder({
        "tokens": src_embedding,
        "character_tokens": src_char_encoder
        })
    tgt_embedders = BasicTextFieldEmbedder({
        "tokens": tgt_embedding,
        "character_tokens": tgt_char_encoder
        })
    
    train_loader = SimpleDataLoader.from_dataset_reader(
                                                      reader=dataset_reader, 
                                                      data_path=args.train_file,
                                                      batch_size=args.bs,
                                                      shuffle=True)
    train_loader.index_with(vocab)
    val_loader = SimpleDataLoader.from_dataset_reader(reader=dataset_reader,
                                                      data_path=args.valid_file,
                                                      batch_size=args.bs)
    val_loader.index_with(vocab)
    model = create_seq2seqmodel(vocab, src_embedders=src_embedders, tgt_embedders=tgt_embedders, hidden_dim=args.hid_dim,
                                max_decoding_steps=args.maxlen, device=device)
    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"The model has {count_parameters(model)} parameters.")

    save_dir = None
    if args.save: