Exemple #1
0
def run(arguments):
    if arguments["--aml"]:
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)
    training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"], azure_info_path)
    training_data = LazyDataIterable(lambda: load_from_folder(training_data_path, shuffle=True))

    validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"], azure_info_path)
    validation_data = LazyDataIterable(
        lambda: load_from_folder(validation_data_path, shuffle=False)
    )

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)
    if restore_path:
        initialize_metadata = False
        model, nn = Graph2Class.restore_model(Path(restore_path))
    elif arguments["--aml"] and model_path.exists():
        initialize_metadata = False
        model, nn = Graph2Class.restore_model(model_path)
    else:
        nn = None
        model = create_graph2class_gnn_model()

    def create_optimizer(parameters):
        return torch.optim.Adam(parameters, lr=0.00025)

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        optimizer_creator=create_optimizer,
        clip_gradient_norm=1,
        target_validation_metric="Accuracy",
        target_validation_metric_higher_is_better=True,
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model, epoch, metrics)
    )
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model, epoch, metrics)
    )

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
        patience=10,
        store_tensorized_data_in_memory=True,
    )

    test_data_path = RichPath.create(arguments["TEST_DATA_PATH"], azure_info_path)
    test_data = LazyDataIterable(lambda: load_from_folder(test_data_path, shuffle=False))
    acc = model.report_accuracy(
        test_data,
        trainer.neural_module,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    )
    print(f"Test accuracy: {acc:%}")

    if aml_ctx is not None:
        aml_ctx.log("Test Accuracy", acc)
        aml_ctx.upload_file(name="model.pkl.gz", path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)
Exemple #2
0
def train(cfg):
    if cfg.show_in_wandb:
        logger = logging.getLogger("wandb")
        logger.setLevel(logging.WARNING)
        # Initializing wandb logger.
        name_of_run = cfg.model.name_of_run
        wandb.init(project="IdTransformer",
                   config=cfg,
                   group="GNN",
                   name=name_of_run)

    training_data = LazyDataIterable(lambda: load_from_folder(
        join(cfg.dataset.path, "train"), shuffle=True))
    validation_data = LazyDataIterable(lambda: load_from_folder(
        join(cfg.dataset.path, "validation"), shuffle=False))
    test_data = LazyDataIterable(lambda: load_from_folder(
        join(cfg.dataset.path, "test"), shuffle=False))

    model_path = Path(cfg.model.filename)
    assert model_path.name.endswith(
        ".pkl.gz"), "model filename must have a `.pkl.gz` suffix."

    initialize_metadata = True
    if cfg.model.use_checkpoint and cfg.model.restore_path:
        initialize_metadata = False
        model, nn = VarNamingModel.restore_model(Path(cfg.model.restore_path))
    else:
        nn = None
        model = create_var_naming_gnn_model(cfg.model)

    def create_optimizer(parameters):
        return torch.optim.Adam(parameters, lr=cfg.model.max_lr)

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(cfg.model.max_epochs),
        minibatch_size=int(cfg.model.minibatch_size),
        optimizer_creator=create_optimizer,
        clip_gradient_norm=1,
        target_validation_metric="accuracy",
        target_validation_metric_higher_is_better=True,
    )
    if nn is not None:
        trainer.neural_module = nn

    if cfg.show_in_wandb:
        trainer.register_train_epoch_end_hook(
            lambda model, nn, epoch, metrics: log_run("train", model, epoch,
                                                      metrics))
        trainer.register_validation_epoch_end_hook(
            lambda model, nn, epoch, metrics: log_run("val", model, epoch,
                                                      metrics))

    trainer.train(training_data,
                  validation_data,
                  validate_on_start=cfg.model.validate_on_start,
                  show_progress_bar=True,
                  initialize_metadata=initialize_metadata,
                  parallelize=cfg.model.parallelize,
                  use_multiprocessing=cfg.model.use_multiprocessing)
Exemple #3
0
def run(arguments):
    if arguments["--aml"]:
        import torch
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(
        ), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)

    training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"],
                                         azure_info_path)
    training_data = LazyDataIterable(
        lambda: training_data_path.read_as_jsonl())

    validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"],
                                           azure_info_path)
    validation_data = LazyDataIterable(
        lambda: validation_data_path.read_as_jsonl())

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(
        ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)
    if restore_path:
        initialize_metadata = False
        model, nn = AbstractNeuralModel.restore_model(Path(restore_path))
    else:
        embedding_size = 128
        dropout_rate = 0.1
        nn = None

        def create_mp_layers(num_edges: int):
            ggnn_mp = GatedMessagePassingLayer(
                state_dimension=embedding_size,
                message_dimension=embedding_size,
                num_edge_types=num_edges,
                message_aggregation_function="sum",
                dropout_rate=dropout_rate,
            )
            r1 = MeanResidualLayer(embedding_size)
            return [
                r1.pass_through_dummy_layer(),
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                r1,
                GatedMessagePassingLayer(
                    state_dimension=embedding_size,
                    message_dimension=embedding_size,
                    num_edge_types=num_edges,
                    message_aggregation_function="sum",
                    dropout_rate=dropout_rate,
                ),
            ]

        model = Graph2Seq(
            gnn_model=GraphNeuralNetworkModel(
                node_representation_model=StrElementRepresentationModel(
                    token_splitting="token",
                    embedding_size=embedding_size,
                ),
                message_passing_layer_creator=create_mp_layers,
            ),
            decoder=GruCopyingDecoderModel(hidden_size=128,
                                           embedding_size=256,
                                           memories_hidden_dim=embedding_size),
        )

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model,
                                                  epoch, metrics))
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model,
                                                  epoch, metrics))

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
    )

    if aml_ctx is not None:
        aml_ctx.upload_file(name="model.pkl.gz",
                            path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)