def log_run_lambda(aml_ctx, fold, model, nn, epoch, metrics): """A utility function that can be used with partial(), and can be serialized through multiprocessing.""" log_run(aml_ctx, fold, model, epoch, metrics)
def run(arguments): if arguments["--aml"]: from azureml.core.run import Run aml_ctx = Run.get_context() assert torch.cuda.is_available( ), "No CUDA available. Aborting training." else: aml_ctx = None log_path = configure_logging(aml_ctx) azure_info_path = arguments.get("--azure-info", None) data_path = RichPath.create(arguments["DATA_PATH"], azure_info_path) training_data = PPIDatasetLoader.load_data(data_path, "train") validation_data = PPIDatasetLoader.load_data(data_path, "valid") model_path = Path(arguments["MODEL_FILENAME"]) assert model_path.name.endswith( ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix." initialize_metadata = True restore_path = arguments.get("--restore-path", None) if restore_path: initialize_metadata = False model, nn = AbstractNeuralModel.restore_model(Path(restore_path)) else: model = create_ppi_gnn_model() nn = None def create_optimizer(parameters): return torch.optim.Adam( parameters, lr=1e-3, ) trainer = ModelTrainer( model, model_path, max_num_epochs=int(arguments["--max-num-epochs"]), minibatch_size=int(arguments["--minibatch-size"]), optimizer_creator=create_optimizer, clip_gradient_norm=1, target_validation_metric="f1_score", target_validation_metric_higher_is_better=True, ) if nn is not None: trainer.neural_module = nn trainer.register_train_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model, epoch, metrics)) trainer.register_validation_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model, epoch, metrics)) trainer.train( training_data, validation_data, show_progress_bar=not arguments["--quiet"], initialize_metadata=initialize_metadata, parallelize=not arguments["--sequential-run"], patience=20, ) test_data = PPIDatasetLoader.load_data(data_path, "test") metrics = model.report_metrics( test_data, trainer.neural_module, device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), ) print(f"Test metrics: {json.dumps(metrics, indent=3)}") if aml_ctx is not None: aml_ctx.upload_file(name="model.pkl.gz", path_or_stream=str(model_path)) aml_ctx.upload_file(name="full.log", path_or_stream=log_path)
def run(arguments): if arguments["--aml"]: from azureml.core.run import Run aml_ctx = Run.get_context() assert torch.cuda.is_available(), "No CUDA available. Aborting training." else: aml_ctx = None log_path = configure_logging(aml_ctx) azure_info_path = arguments.get("--azure-info", None) training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"], azure_info_path) training_data = LazyDataIterable(lambda: load_from_folder(training_data_path, shuffle=True)) validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"], azure_info_path) validation_data = LazyDataIterable( lambda: load_from_folder(validation_data_path, shuffle=False) ) model_path = Path(arguments["MODEL_FILENAME"]) assert model_path.name.endswith(".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix." initialize_metadata = True restore_path = arguments.get("--restore-path", None) if restore_path: initialize_metadata = False model, nn = Graph2Class.restore_model(Path(restore_path)) elif arguments["--aml"] and model_path.exists(): initialize_metadata = False model, nn = Graph2Class.restore_model(model_path) else: nn = None model = create_graph2class_gnn_model() def create_optimizer(parameters): return torch.optim.Adam(parameters, lr=0.00025) trainer = ModelTrainer( model, model_path, max_num_epochs=int(arguments["--max-num-epochs"]), minibatch_size=int(arguments["--minibatch-size"]), optimizer_creator=create_optimizer, clip_gradient_norm=1, target_validation_metric="Accuracy", target_validation_metric_higher_is_better=True, enable_amp=arguments["--amp"], ) if nn is not None: trainer.neural_module = nn trainer.register_train_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model, epoch, metrics) ) trainer.register_validation_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model, epoch, metrics) ) trainer.train( training_data, validation_data, show_progress_bar=not arguments["--quiet"], initialize_metadata=initialize_metadata, parallelize=not arguments["--sequential-run"], patience=10, store_tensorized_data_in_memory=True, ) test_data_path = RichPath.create(arguments["TEST_DATA_PATH"], azure_info_path) test_data = LazyDataIterable(lambda: load_from_folder(test_data_path, shuffle=False)) acc = model.report_accuracy( test_data, trainer.neural_module, device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), ) print(f"Test accuracy: {acc:%}") if aml_ctx is not None: aml_ctx.log("Test Accuracy", acc) aml_ctx.upload_file(name="model.pkl.gz", path_or_stream=str(model_path)) aml_ctx.upload_file(name="full.log", path_or_stream=log_path)
def run(arguments): if arguments["--aml"]: import torch from azureml.core.run import Run aml_ctx = Run.get_context() assert torch.cuda.is_available( ), "No CUDA available. Aborting training." else: aml_ctx = None log_path = configure_logging(aml_ctx) azure_info_path = arguments.get("--azure-info", None) training_data_path = RichPath.create(arguments["TRAIN_DATA_PATH"], azure_info_path) training_data = LazyDataIterable( lambda: training_data_path.read_as_jsonl()) validation_data_path = RichPath.create(arguments["VALID_DATA_PATH"], azure_info_path) validation_data = LazyDataIterable( lambda: validation_data_path.read_as_jsonl()) model_path = Path(arguments["MODEL_FILENAME"]) assert model_path.name.endswith( ".pkl.gz"), "MODEL_FILENAME must have a `.pkl.gz` suffix." initialize_metadata = True restore_path = arguments.get("--restore-path", None) if restore_path: initialize_metadata = False model, nn = AbstractNeuralModel.restore_model(Path(restore_path)) else: embedding_size = 128 dropout_rate = 0.1 nn = None def create_mp_layers(num_edges: int): ggnn_mp = GatedMessagePassingLayer( state_dimension=embedding_size, message_dimension=embedding_size, num_edge_types=num_edges, message_aggregation_function="sum", dropout_rate=dropout_rate, ) r1 = MeanResidualLayer(embedding_size) return [ r1.pass_through_dummy_layer(), ggnn_mp, ggnn_mp, ggnn_mp, ggnn_mp, ggnn_mp, ggnn_mp, ggnn_mp, r1, GatedMessagePassingLayer( state_dimension=embedding_size, message_dimension=embedding_size, num_edge_types=num_edges, message_aggregation_function="sum", dropout_rate=dropout_rate, ), ] model = Graph2Seq( gnn_model=GraphNeuralNetworkModel( node_representation_model=StrElementRepresentationModel( token_splitting="token", embedding_size=embedding_size, ), message_passing_layer_creator=create_mp_layers, ), decoder=GruCopyingDecoderModel(hidden_size=128, embedding_size=256, memories_hidden_dim=embedding_size), ) trainer = ModelTrainer( model, model_path, max_num_epochs=int(arguments["--max-num-epochs"]), minibatch_size=int(arguments["--minibatch-size"]), enable_amp=arguments["--amp"], ) if nn is not None: trainer.neural_module = nn trainer.register_train_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "train", model, epoch, metrics)) trainer.register_validation_epoch_end_hook( lambda model, nn, epoch, metrics: log_run(aml_ctx, "valid", model, epoch, metrics)) trainer.train( training_data, validation_data, show_progress_bar=not arguments["--quiet"], initialize_metadata=initialize_metadata, parallelize=not arguments["--sequential-run"], ) if aml_ctx is not None: aml_ctx.upload_file(name="model.pkl.gz", path_or_stream=str(model_path)) aml_ctx.upload_file(name="full.log", path_or_stream=log_path)