Beispiel #1
0
def log_run_lambda(aml_ctx, fold, model, nn, epoch, metrics):
    """A utility function that can be used with partial(), and can be serialized through multiprocessing."""
    log_run(aml_ctx, fold, model, epoch, metrics)
Beispiel #2
0
def run(arguments):
    if arguments["--aml"]:
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(
        ), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)

    data_path = RichPath.create(arguments["DATA_PATH"], azure_info_path)
    training_data = PPIDatasetLoader.load_data(data_path, "train")
    validation_data = PPIDatasetLoader.load_data(data_path, "valid")

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(
        ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)

    if restore_path:
        initialize_metadata = False
        model, nn = AbstractNeuralModel.restore_model(Path(restore_path))
    else:
        model = create_ppi_gnn_model()
        nn = None

    def create_optimizer(parameters):
        return torch.optim.Adam(
            parameters,
            lr=1e-3,
        )

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        optimizer_creator=create_optimizer,
        clip_gradient_norm=1,
        target_validation_metric="f1_score",
        target_validation_metric_higher_is_better=True,
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model,
                                                  epoch, metrics))
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model,
                                                  epoch, metrics))

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
        patience=20,
    )

    test_data = PPIDatasetLoader.load_data(data_path, "test")
    metrics = model.report_metrics(
        test_data,
        trainer.neural_module,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    )
    print(f"Test metrics: {json.dumps(metrics, indent=3)}")

    if aml_ctx is not None:
        aml_ctx.upload_file(name="model.pkl.gz",
                            path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)
Beispiel #3
0
def run(arguments):
    if arguments["--aml"]:
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)
    training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"], azure_info_path)
    training_data = LazyDataIterable(lambda: load_from_folder(training_data_path, shuffle=True))

    validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"], azure_info_path)
    validation_data = LazyDataIterable(
        lambda: load_from_folder(validation_data_path, shuffle=False)
    )

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)
    if restore_path:
        initialize_metadata = False
        model, nn = Graph2Class.restore_model(Path(restore_path))
    elif arguments["--aml"] and model_path.exists():
        initialize_metadata = False
        model, nn = Graph2Class.restore_model(model_path)
    else:
        nn = None
        model = create_graph2class_gnn_model()

    def create_optimizer(parameters):
        return torch.optim.Adam(parameters, lr=0.00025)

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        optimizer_creator=create_optimizer,
        clip_gradient_norm=1,
        target_validation_metric="Accuracy",
        target_validation_metric_higher_is_better=True,
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model, epoch, metrics)
    )
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model, epoch, metrics)
    )

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
        patience=10,
        store_tensorized_data_in_memory=True,
    )

    test_data_path = RichPath.create(arguments["TEST_DATA_PATH"], azure_info_path)
    test_data = LazyDataIterable(lambda: load_from_folder(test_data_path, shuffle=False))
    acc = model.report_accuracy(
        test_data,
        trainer.neural_module,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    )
    print(f"Test accuracy: {acc:%}")

    if aml_ctx is not None:
        aml_ctx.log("Test Accuracy", acc)
        aml_ctx.upload_file(name="model.pkl.gz", path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)
Beispiel #4
0
def run(arguments):
    if arguments["--aml"]:
        import torch
        from azureml.core.run import Run

        aml_ctx = Run.get_context()
        assert torch.cuda.is_available(
        ), "No CUDA available. Aborting training."
    else:
        aml_ctx = None

    log_path = configure_logging(aml_ctx)
    azure_info_path = arguments.get("--azure-info", None)

    training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"],
                                         azure_info_path)
    training_data = LazyDataIterable(
        lambda: training_data_path.read_as_jsonl())

    validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"],
                                           azure_info_path)
    validation_data = LazyDataIterable(
        lambda: validation_data_path.read_as_jsonl())

    model_path = Path(arguments["MODEL_FILENAME"])
    assert model_path.name.endswith(
        ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix."

    initialize_metadata = True
    restore_path = arguments.get("--restore-path", None)
    if restore_path:
        initialize_metadata = False
        model, nn = AbstractNeuralModel.restore_model(Path(restore_path))
    else:
        embedding_size = 128
        dropout_rate = 0.1
        nn = None

        def create_mp_layers(num_edges: int):
            ggnn_mp = GatedMessagePassingLayer(
                state_dimension=embedding_size,
                message_dimension=embedding_size,
                num_edge_types=num_edges,
                message_aggregation_function="sum",
                dropout_rate=dropout_rate,
            )
            r1 = MeanResidualLayer(embedding_size)
            return [
                r1.pass_through_dummy_layer(),
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                ggnn_mp,
                r1,
                GatedMessagePassingLayer(
                    state_dimension=embedding_size,
                    message_dimension=embedding_size,
                    num_edge_types=num_edges,
                    message_aggregation_function="sum",
                    dropout_rate=dropout_rate,
                ),
            ]

        model = Graph2Seq(
            gnn_model=GraphNeuralNetworkModel(
                node_representation_model=StrElementRepresentationModel(
                    token_splitting="token",
                    embedding_size=embedding_size,
                ),
                message_passing_layer_creator=create_mp_layers,
            ),
            decoder=GruCopyingDecoderModel(hidden_size=128,
                                           embedding_size=256,
                                           memories_hidden_dim=embedding_size),
        )

    trainer = ModelTrainer(
        model,
        model_path,
        max_num_epochs=int(arguments["--max-num-epochs"]),
        minibatch_size=int(arguments["--minibatch-size"]),
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    trainer.register_train_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model,
                                                  epoch, metrics))
    trainer.register_validation_epoch_end_hook(
        lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model,
                                                  epoch, metrics))

    trainer.train(
        training_data,
        validation_data,
        show_progress_bar=not arguments["--quiet"],
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
    )

    if aml_ctx is not None:
        aml_ctx.upload_file(name="model.pkl.gz",
                            path_or_stream=str(model_path))
        aml_ctx.upload_file(name="full.log", path_or_stream=log_path)