Beispiel #1
0
def build_trainer(
    config,
    lr: float,
    serialization_dir: str,
    num_epochs: int,
    model: Model,
    train_loader: DataLoader,
    dev_loader: DataLoader) -> Trainer:

    parameters = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=lr)
    if torch.cuda.is_available():
        model.cuda()

    # remove serialization dir
    if os.path.exists(serialization_dir) and config.shutil_pre_finished_experiment:
        shutil.rmtree(serialization_dir)

    if not os.path.exists(serialization_dir):
        os.makedirs(serialization_dir)

    trainer = GradientDescentTrainer(
        model=model,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=num_epochs,
        optimizer=optimizer,
        serialization_dir=serialization_dir,
        cuda_device=0 if torch.cuda.is_available() else -1
    )

    return trainer
Beispiel #2
0
def build_trainer(
    model: Model,
    serialization_dir: str,
    train_loader: DataLoader,
    dev_loader: DataLoader = None,
    num_epochs: int = 1,
    cuda_device: int = -1,
    patience: int = None
    ) -> Trainer:
    parameters = [
        [n, p]
        for n, p in model.named_parameters() if p.requires_grad
    ]
    optimizer = AdamOptimizer(parameters)
    trainer = GradientDescentTrainer(
        model=model,
        serialization_dir=serialization_dir,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=num_epochs,
        optimizer=optimizer,
        cuda_device=cuda_device,
        patience=patience
    )
    return trainer
def build_trainer(
    model: Model,
    serialization_dir: str,
    train_loader: DataLoader,
    dev_loader: DataLoader
) -> Trainer:
    parameters = [
        [n, p]
        for n, p in model.named_parameters() if p.requires_grad
    ]

    checkpointer  = Checkpointer(serialization_dir, num_serialized_models_to_keep=0)
    optimizer = AdamOptimizer(parameters)
    trainer = GradientDescentTrainer(
        model=model,
        serialization_dir=serialization_dir,
        checkpointer=checkpointer,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=50,
        optimizer=optimizer,
        cuda_device=0,
        validation_metric="-loss",
        patience=5,


    )
    return trainer
def train(args,
          model,
          dataset_reader,
          train_loader,
          device=0,
          val_loader=None,
          test_data=None,
          num_epochs=10,
          patience=None,
          serialization_dir=None):
    optimizer = AdamOptimizer(model.named_parameters(),
                              lr=args.lr,
                              weight_decay=args.l2)
    trainer = GradientDescentTrainer(
        model=model,
        optimizer=optimizer,
        data_loader=train_loader,
        validation_data_loader=val_loader,
        cuda_device=device,
        num_epochs=num_epochs,
        serialization_dir=serialization_dir,
        patience=patience,
        grad_clipping=args.clip,
    )
    trainer.train()

    if test_data is not None:
        predictor = Seq2SeqPredictor(model, dataset_reader)
        for instance in itertools.islice(test_data, 10):
            print('SOURCE:', instance.fields['source_tokens'].tokens)
            print('GOLD:', instance.fields['target_tokens'].tokens)
            print('PRED:',
                  predictor.predict_instance(instance)['predicted_tokens'])
            print('-' * 50)
Beispiel #5
0
def build_trainer(model: Model, serialization_dir: str,
                  train_loader: DataLoader, dev_loader: DataLoader) -> Trainer:

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print("cpu/gpu? ", device)

    model = model.to(device)

    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=0.001)
    scheduler = ReduceOnPlateauLearningRateScheduler(optimizer=optimizer,
                                                     patience=5,
                                                     verbose=True)

    trainer = GradientDescentTrainer(
        model=model,
        serialization_dir=serialization_dir,
        cuda_device=device,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        learning_rate_scheduler=scheduler,
        patience=20,
        num_epochs=200,
        optimizer=optimizer,
        validation_metric="+f1",
    )
    return trainer
Beispiel #6
0
def build_trainer(model: Model, serialization_dir: str,
                  train_loader: PyTorchDataLoader,
                  dev_loader: PyTorchDataLoader) -> Trainer:
    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=lr)
    trainer = GradientDescentTrainer(model=model, serialization_dir=serialization_dir, data_loader=train_loader, \
                validation_data_loader=dev_loader, num_epochs=num_epoch, optimizer=optimizer, num_gradient_accumulation_steps=grad_accum)
    return trainer
Beispiel #7
0
 def init_trainer(self) -> Trainer:
     parameters = [(n, p) for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = AdamOptimizer(parameters, lr=self.config.lr)  # type: ignore
     trainer = GradientDescentTrainer(
         model=self.model,
         serialization_dir='./output',
         data_loader=self.train_data_loader,
         validation_data_loader=self.dev_data_loader,
         num_epochs=self.config.epoch,
         optimizer=optimizer,
         cuda_device=self.config.device,
     )
     return trainer
Beispiel #8
0
def build_trainer(model: Model, serialization_dir: str,
                  train_loader: DataLoader, dev_loader: DataLoader) -> Trainer:
    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters)
    trainer = GradientDescentTrainer(
        model=model,
        #serialization_dir=serialization_dir,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=5,
        optimizer=optimizer,
    )
    return trainer
def build_trainer(model: Model, serialization_dir: str, train_loader: DataLoader,
                  dev_loader: DataLoader) -> Trainer:
    parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr = 0.001)
    scheduler = ReduceOnPlateauLearningRateScheduler(optimizer = optimizer,
                                                     patience = 5,
                                                    verbose=True)
    
    trainer = GradientDescentTrainer(model=model, serialization_dir=serialization_dir, 
                                     data_loader=train_loader, validation_data_loader=dev_loader, 
                                     learning_rate_scheduler = scheduler,
                                     patience=20, num_epochs=200,
                                     optimizer=optimizer,
                                    validation_metric = "+accuracy",
                                    )
    return trainer
Beispiel #10
0
def build_trainer(model: Model, serialization_dir:str, train_loader: PyTorchDataLoader, dev_loader: PyTorchDataLoader) -> Trainer:
    parameters = [[n,p] for n, p in model.named_parameters() if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=lr, weight_decay=weight_decay, betas=(0.9, 0.98), eps=1e-09)
    lr_scheduler = NoamLR(optimizer, model_size = embedding_dim, warmup_steps = warmup)
    # lr_scheduler = InverseSquareRootLR(optimizer, warmup_steps = warmup, end_lr = lr)
    # lr_scheduler = ReduceOnPlateauLearningRateScheduler(optimizer, factor = 0.8, patience = 3, min_lr = 0.000001, eps=1e-08)
    trainer = GradientDescentTrainer(
        model=model, 
        serialization_dir=serialization_dir, 
        data_loader=train_loader, \
        validation_data_loader=dev_loader, 
        num_epochs=num_epoch, 
        optimizer=optimizer, \
        num_gradient_accumulation_steps=grad_accum,
        grad_norm=grad_norm, 
        patience=patience,
        learning_rate_scheduler=lr_scheduler)
    return trainer
Beispiel #11
0
def build_trainer(model: Model, ser_dir: str, train_loader: DataLoader, valid_loader: DataLoader,
                  hugging_optim: bool, cuda_device: int) -> Trainer:
    params = [ [n, p] for n, p in model.named_parameters() if p.requires_grad ]
    logging.info(f"{len(params)} parameters requiring grad updates")
    if hugging_optim:
        optim = HuggingfaceAdamWOptimizer(params, lr=1.0e-5)
    else:
        optim = AdamOptimizer(params)
    return GradientDescentTrainer(
        model=model,
        serialization_dir=ser_dir,
        data_loader=train_loader,
        validation_data_loader=valid_loader,
        num_epochs=5,
        patience=None,  # early stopping is disabled
        optimizer=optim,
        cuda_device=cuda_device
    )
Beispiel #12
0
def build_trainer(model: Model,
                  serialization_dir: str,
                  train_loader: DataLoader,
                  dev_loader: DataLoader,
                  num_epochs: int,
                  learning_rate: float = 0.001,
                  cuda_device=None) -> Trainer:
    """
    Builds instance of Trainer class with specified training hyperparameters
    Adapted from https://guide.allennlp.org/training-and-prediction

    Parameters
        model : Model
            The model to train
        serialization_dir : str
            Directory to save checkpoints and results
        train_loader : DataLoader
            Previously built dataset loader for training data
        dev_loader : DataLoader
            Previously built loader for dev data
        num_epochs : int
            Number of epochs to train for
        learning_rate : float (default: 0.001)
        cuda_device : int (default: None)
            >=0 if using GPU

    Returns
        trainer : Trainer
    """
    parameters = [(n, p) for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=learning_rate)  # type: ignore
    trainer = GradientDescentTrainer(model=model,
                                     checkpointer=Checkpointer(
                                         serialization_dir,
                                         num_serialized_models_to_keep=-1),
                                     serialization_dir=serialization_dir,
                                     data_loader=train_loader,
                                     validation_data_loader=dev_loader,
                                     num_epochs=num_epochs,
                                     optimizer=optimizer,
                                     cuda_device=cuda_device)
    print("Will train for", num_epochs, "epochs")
    return trainer
Beispiel #13
0
def build_trainer(model: Model, serialization_dir: str,
                  train_loader: DataLoader, dev_loader: DataLoader) -> Trainer:
    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters)
    # There are a *lot* of other things you could configure with the trainer.  See
    # http://docs.allennlp.org/master/api/training/trainer/#gradientdescenttrainer-objects for more
    # information.

    trainer = GradientDescentTrainer(
        model=model,
        serialization_dir=serialization_dir,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=5,
        optimizer=optimizer,
        validation_metric="+accuracy",
    )
    return trainer
def build_trainer(
    config,
    model: Model,
    train_loader: DataLoader,
    dev_loader: DataLoader,
) -> Trainer:
    parameters = [(n, p) for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=config.lr)  # type: ignore
    model.cuda()
    trainer = GradientDescentTrainer(
        model=model,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=config.num_epochs,
        optimizer=optimizer,
        cuda_device=0,
        serialization_dir=config.serialization_dir)
    return trainer
Beispiel #15
0
def build_classifier_trainer(model: Model,
                             serialization_dir: str,
                             train_loader: DataLoader,
                             dev_loader: DataLoader,
                             num_epochs: int = 1,
                             cuda_device: int = -1,
                             learning_rate: float = 0.000025,
                             world_size: int = 1,
                             distributed: bool = False) -> Trainer:
    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters, lr=learning_rate)
    trainer = GradientDescentTrainer(model=model,
                                     serialization_dir=serialization_dir,
                                     data_loader=train_loader,
                                     validation_data_loader=dev_loader,
                                     num_epochs=num_epochs,
                                     optimizer=optimizer,
                                     cuda_device=cuda_device,
                                     world_size=world_size,
                                     distributed=distributed,
                                     validation_metric='+accuracy')
    return trainer
Beispiel #16
0
    def init_trainer(self) -> Trainer:
        parameters = [(n, p) for n, p in self.model.named_parameters()
                      if p.requires_grad]

        group_parameter_group = [(['_text_field_embedder.*'], {
            'lr': self.config.lr
        }), (['_classification_layer.*'], {
            'lr': self.config.classifier_lr
        })]

        optimizer = AdamOptimizer(parameters,
                                  parameter_groups=group_parameter_group,
                                  lr=self.config.lr)  # type: ignore

        trainer = GradientDescentTrainer(
            model=self.model,
            serialization_dir='./output',
            data_loader=self.train_data_loader,
            validation_data_loader=self.dev_data_loader,
            num_epochs=self.config.epoch,
            optimizer=optimizer,
            cuda_device=self.config.device,
        )
        return trainer
Beispiel #17
0
        {'tokens': Embedding(
            embedding_dim=10,
            num_embeddings=vocab.get_vocab_size('tokens'))})

    encoder = LstmSeq2VecEncoder(10, 32, bidirectional=True)
    # encoder = BagOfEmbeddingsEncoder(embedding_dim=10)

    model = IntentEstimator(vocab, embedder, encoder)
    model.cuda()

    # モデルの学習

    with tempfile.TemporaryDirectory() as serialization_dir:
        parameters = [
            [n, p] for n, p in model.named_parameters() if p.requires_grad]
        optimizer = AdamOptimizer(parameters)
        trainer = GradientDescentTrainer(
            model=model,
            serialization_dir=serialization_dir,
            data_loader=train_loader,
            validation_data_loader=valid_loader,
            num_epochs=20,
            optimizer=optimizer,
            cuda_device=0)

        trainer.train()

    # モデルの実行

    predictor = IntentEstimatorPredictor(model, dataset_reader)
# テキストの特徴ベクトルの作成
text_embedder = BasicTextFieldEmbedder({"tokens": embedding})
encoder = BagOfEmbeddingsEncoder(embedding_dim=100)

# 文書分類器の作成
model = BasicClassifier(vocab=vocab,
                        text_field_embedder=text_embedder,
                        seq2vec_encoder=encoder)

# データローダ
train_loader = PyTorchDataLoader(train_dataset, batch_size=32, shuffle=True)
validation_loader = PyTorchDataLoader(validation_dataset,
                                      batch_size=32,
                                      shuffle=False)

# GPU上にモデルをコピー
# model = model.cuda()

# オプティマイザの作成
optimizer = AdamOptimizer(model.named_parameters())

# トレイナの作成
trainer = GradientDescentTrainer(model=model,
                                 optimizer=optimizer,
                                 data_loader=train_loader,
                                 validation_data_loader=validation_loader,
                                 num_epochs=10,
                                 patience=3)

metrics = trainer.train()
pprint.pprint(metrics)