Example #1
0
def objective(trial):
    train_dataset, valid_dataset, vocab = prepare_data()
    model = create_model(vocab, trial)

    if DEVICE > -1:
        model.to(torch.device("cuda:{}".format(DEVICE)))

    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=64, collate_fn=allennlp.data.allennlp_collate
    )
    validation_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=64, collate_fn=allennlp.data.allennlp_collate
    )

    serialization_dir = os.path.join(MODEL_DIR, "trial_{}".format(trial.number))
    trainer = allennlp.training.GradientDescentTrainer(
        model=model,
        optimizer=optimizer,
        data_loader=data_loader,
        validation_data_loader=validation_data_loader,
        validation_metric="+" + TARGET_METRIC,
        patience=None,  # `patience=None` since it could conflict with AllenNLPPruningCallback
        num_epochs=50,
        cuda_device=DEVICE,
        serialization_dir=serialization_dir,
        epoch_callbacks=[AllenNLPPruningCallback(trial, "validation_" + TARGET_METRIC)],
    )
    metrics = trainer.train()
    return metrics["best_validation_" + TARGET_METRIC]
def objective_fn(
        trial: Trial,
        device: int,
        direction: str,
        target_metric: str,
        base_serialization_dir: str,
):
    embedding_dim = trial.suggest_int("embedding_dim", 128, 256)
    max_filter_size = trial.suggest_int("max_filter_size", 3, 6)
    num_filters = trial.suggest_int("num_filters", 128, 256)
    output_dim = trial.suggest_int("output_dim", 128, 512)
    dropout = trial.suggest_float("dropout", 0, 1.0, log=False)
    lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True)

    train_dataset, valid_dataset, vocab = prepare_data()
    model = create_model(vocab, embedding_dim, max_filter_size, num_filters, output_dim, dropout)

    if device > -1:
        model.to(torch.device("cuda:{}".format(device)))

    optimizer = SGD(model.parameters(), lr=lr)
    data_loader = DataLoader(train_dataset, batch_size=10, collate_fn=allennlp_collate)
    validation_data_loader = DataLoader(valid_dataset, batch_size=64, collate_fn=allennlp_collate)
    serialization_dir = os.path.join(base_serialization_dir, "trial_{}".format(trial.number))
    trainer = GradientDescentTrainer(
        model=model,
        optimizer=optimizer,
        data_loader=data_loader,
        validation_data_loader=validation_data_loader,
        validation_metric=("+" if direction == "MAXIMIZE" else "-") + target_metric,
        patience=None,  # `patience=None` since it could conflict with AllenNLPPruningCallback
        num_epochs=50,
        cuda_device=device,
        serialization_dir=serialization_dir,
        epoch_callbacks=[AllenNLPPruningCallback(trial, f"validation_{target_metric}")],
    )
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))
    return trainer.train()[f"best_validation_{target_metric}"]