def net_prop_with_restart( run_name, model_name, normalization, adj_matrix_file, node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("LOOCV") with mlflow.start_run(run_name=run_name): u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.log_param("model", model_name) mlflow.set_tag("use_cuda", use_cuda) mlflow.log_param("merged_layers", True) labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda) n_nodes = labels.size(0) adjacency_matrix = data_loaders.load_adj_matrices( [adj_matrix_file], normalization=normalization, use_cuda=use_cuda)[0] print(run_name) ranks_df = loocv.run(labels=labels, model_class=NetPropWithRestart, adjacency_matrix=adjacency_matrix, **params) data_savers.save_ranks(ranks_df, n_nodes, run_name, params)
def bagging_rgcn_embeddings(adj_matrix_files, node_labels_file, embeddings_file, use_cuda, params, metadata): mlflow.set_experiment("LOOCV") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda) n_nodes = labels.size(0) embeddings = data_loaders.load_embeddings(embeddings_file, use_cuda=use_cuda) graph = data_loaders.load_graph( adj_matrix_files, n_nodes, add_edge_type=True, add_node_ids=True, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = loocv.run(labels=labels, model_class=Bagging, bagging_model=RGCN, graph=graph, features=embeddings, n_rels=len(adj_matrix_files), **params) data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
def rwr_m(adj_matrix_files, node_labels_file, use_cuda, params, metadata): mlflow.set_experiment("LOOCV") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda) n_nodes = labels.size(0) print("Loading adjacency matrices") adj_matrices = data_loaders.load_adj_matrices(adj_matrix_files, normalization=None, use_cuda=use_cuda) adjacency_matrices = torch.stack(adj_matrices) print(RUN_NAME) ranks_df = loocv.run(labels=labels, model_class=RwrM, adjacency_matrices=adjacency_matrices, **params) data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
def bagging_mlp( embeddings_file, train_node_labels_file, test_node_labels_file, node_features_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() embeddings = None node_features = None if embeddings_file is not None: mlflow.log_param("embeddings", True) mlflow.log_artifact(embeddings_file, "inputs") embeddings = data_loaders.load_embeddings(embeddings_file, use_cuda=use_cuda) else: mlflow.log_param("embeddings", False) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") node_features = data_loaders.load_node_features( node_features_file, use_cuda) else: mlflow.log_param("node_features", False) if embeddings is not None and node_features is not None: in_features = torch.cat((embeddings, node_features), dim=1) elif embeddings is not None: in_features = embeddings elif node_features is not None: in_features = node_features print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=Bagging, bagging_model=MLP, features=in_features, **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def bagging_rgcn( adj_matrix_files, train_node_labels_file, test_node_labels_file, node_features_file, use_cuda, params, metadata, ): mlflow.set_experiment("Test") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() train_mask = ~test_labels.byte() n_nodes = labels.size(0) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") features = data_loaders.load_node_features(node_features_file, use_cuda) else: mlflow.log_param("node_features", False) features = None graph = data_loaders.load_graph( adj_matrix_files, n_nodes, add_edge_type=True, add_node_ids=True, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = test.run( labels=labels, train_mask=train_mask, model_class=Bagging, bagging_model=RGCN, features=features, graph=graph, n_rels=len(adj_matrix_files), **params ) data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
def bagging_gcn( adj_matrix_file, node_features_file, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() n_nodes = labels.size(0) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") features = data_loaders.load_node_features(node_features_file, use_cuda) else: mlflow.log_param("node_features", False) features = None graph = data_loaders.load_graph( [adj_matrix_file], n_nodes, self_loop=SELF_LOOP, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=Bagging, bagging_model=GCN, graph=graph, features=features, **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def bagging_rgcn_embeddings( adj_matrix_files, train_node_labels_file, test_node_labels_file, embeddings_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() n_nodes = labels.size(0) embeddings = data_loaders.load_embeddings(embeddings_file, use_cuda=use_cuda) graph = data_loaders.load_graph( adj_matrix_files, n_nodes, add_edge_type=True, add_node_ids=True, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = predict.run( labels=labels, model_class=Bagging, bagging_model=RGCN, features=embeddings, graph=graph, n_rels=len(adj_matrix_files), **params ) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def net_prop_with_restart( run_name, model_name, normalization, adj_matrix_file, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Test") with mlflow.start_run(run_name=run_name): u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.log_param("model", model_name) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() train_mask = ~test_labels.byte() n_nodes = labels.size(0) adjacency_matrix = data_loaders.load_adj_matrices( [adj_matrix_file], normalization=normalization, use_cuda=use_cuda )[0] print(run_name) ranks_df = test.run( labels=labels, train_mask=train_mask, model_class=NetPropWithRestart, adjacency_matrix=adjacency_matrix, **params ) data_savers.save_ranks(ranks_df, n_nodes, run_name, params)
def bagging_gcn(adj_matrix_file, node_features_file, node_labels_file, use_cuda, params, metadata): mlflow.set_experiment("LOOCV") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) mlflow.log_param("merged_layers", True) labels = data_loaders.load_labels(node_labels_file, use_cuda=use_cuda) n_nodes = labels.size(0) if node_features_file is not None: mlflow.log_param("node_features", True) mlflow.log_artifact(node_features_file, "inputs") features = data_loaders.load_node_features(node_features_file, use_cuda) else: mlflow.log_param("node_features", False) features = None graph = data_loaders.load_graph( [adj_matrix_file], n_nodes, self_loop=SELF_LOOP, normalization=NORMALIZATION, use_cuda=use_cuda, ) print(RUN_NAME) ranks_df = loocv.run(labels=labels, model_class=Bagging, bagging_model=GCN, graph=graph, features=features, **params) data_savers.save_ranks(ranks_df, n_nodes, RUN_NAME, params)
def net_prop_with_restart( run_name, model_name, normalization, adj_matrix_file, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=run_name): u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.log_param("model", model_name) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels( train_node_labels_file, use_cuda=use_cuda ) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() adjacency_matrix = data_loaders.load_adj_matrices( [adj_matrix_file], normalization=normalization, use_cuda=use_cuda )[0] print(run_name) ranks_df = predict.run( labels=labels, model_class=NetPropWithRestart, adjacency_matrix=adjacency_matrix, **params ) u_mlflow.log_dataframe(ranks_df, "predictions", "results")
def rwr_m( adj_matrix_files, train_node_labels_file, test_node_labels_file, use_cuda, params, metadata, ): mlflow.set_experiment("Predict") with mlflow.start_run(run_name=RUN_NAME): mlflow.log_param("model", MODEL_NAME) u_mlflow.add_params(**params) u_mlflow.add_metadata(metadata) mlflow.set_tag("use_cuda", use_cuda) train_labels = data_loaders.load_labels(train_node_labels_file, use_cuda=use_cuda) test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda) labels = (train_labels.byte() | test_labels.byte()).long() print("Loading adjacency matrices") adj_matrices = data_loaders.load_adj_matrices(adj_matrix_files, normalization=None, use_cuda=use_cuda) adjacency_matrices = torch.stack(adj_matrices) print(RUN_NAME) ranks_df = predict.run(labels=labels, model_class=RwrM, adjacency_matrices=adjacency_matrices, **params) u_mlflow.log_dataframe(ranks_df, "predictions", "results")