Example #1
0
def direct_neighbors(
    adj_matrix_file, train_node_labels_file, test_node_labels_file, use_cuda, metadata
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)
        mlflow.log_param("model", MODEL_NAME)

        train_labels = data_loaders.load_labels(
            train_node_labels_file, use_cuda=use_cuda
        )
        test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        n_nodes = labels.size(0)

        graph = data_loaders.load_graph(
            [adj_matrix_file],
            n_nodes,
            self_loop=False,
            normalization=None,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = predict.run(labels=labels, model_class=DirectNeighbors, graph=graph)

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
Example #2
0
def save_ranks(ranks_df, n_nodes, run_name, params=None):
    u_mlflow.log_dataframe(ranks_df, "ranks", "results")

    cum_dist = cumulative_distribution_function(
        ranks=ranks_df["rank"].values, n_nodes=n_nodes
    )

    u_mlflow.log_ndarray(cum_dist, "cum_dist", "results")

    for index, n in enumerate(cum_dist):
        mlflow.log_metric("cum_dist", n, step=index)

    # AUC
    x = np.arange(1, n_nodes + 1)
    auc = metrics.auc(x=x, y=cum_dist)
    mlflow.log_metric("auc", auc)

    # AUC ratio
    mlflow.log_metric("auc_ratio", auc / (n_nodes - 1))

    # Median rank
    median = np.median(ranks_df["rank"].values)
    mlflow.log_metric("median_rank", median)

    # Plot
    fig = plot_cumulative_distribution(x, cum_dist, auc, run_name, params)
    u_mlflow.log_fig(fig, "cumulative_distribution", "figures")
    plt.close(fig)
Example #3
0
def bagging_mlp(
    embeddings_file,
    train_node_labels_file,
    test_node_labels_file,
    node_features_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)

        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()

        embeddings = None
        node_features = None

        if embeddings_file is not None:
            mlflow.log_param("embeddings", True)
            mlflow.log_artifact(embeddings_file, "inputs")
            embeddings = data_loaders.load_embeddings(embeddings_file,
                                                      use_cuda=use_cuda)
        else:
            mlflow.log_param("embeddings", False)

        if node_features_file is not None:
            mlflow.log_param("node_features", True)
            mlflow.log_artifact(node_features_file, "inputs")
            node_features = data_loaders.load_node_features(
                node_features_file, use_cuda)
        else:
            mlflow.log_param("node_features", False)

        if embeddings is not None and node_features is not None:
            in_features = torch.cat((embeddings, node_features), dim=1)
        elif embeddings is not None:
            in_features = embeddings
        elif node_features is not None:
            in_features = node_features

        print(RUN_NAME)
        ranks_df = predict.run(labels=labels,
                               model_class=Bagging,
                               bagging_model=MLP,
                               features=in_features,
                               **params)

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
Example #4
0
def bagging_rgcn(
    adj_matrix_files,
    train_node_labels_file,
    test_node_labels_file,
    node_features_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()
        n_nodes = labels.size(0)

        if node_features_file is not None:
            mlflow.log_param("node_features", True)
            mlflow.log_artifact(node_features_file, "inputs")
            features = data_loaders.load_node_features(node_features_file,
                                                       use_cuda)
        else:
            mlflow.log_param("node_features", False)
            features = None

        graph = data_loaders.load_graph(
            adj_matrix_files,
            n_nodes,
            add_edge_type=True,
            add_node_ids=True,
            normalization=NORMALIZATION,
            use_cuda=use_cuda,
        )

        print(RUN_NAME)
        ranks_df = predict.run(labels=labels,
                               model_class=Bagging,
                               bagging_model=RGCN,
                               features=features,
                               graph=graph,
                               n_rels=len(adj_matrix_files),
                               **params)

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
Example #5
0
def net_prop_with_restart(
    run_name,
    model_name,
    normalization,
    adj_matrix_file,
    train_node_labels_file,
    test_node_labels_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=run_name):
        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)

        mlflow.log_param("model", model_name)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(
            train_node_labels_file, use_cuda=use_cuda
        )
        test_labels = data_loaders.load_labels(test_node_labels_file, use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()

        adjacency_matrix = data_loaders.load_adj_matrices(
            [adj_matrix_file], normalization=normalization, use_cuda=use_cuda
        )[0]

        print(run_name)
        ranks_df = predict.run(
            labels=labels,
            model_class=NetPropWithRestart,
            adjacency_matrix=adjacency_matrix,
            **params
        )

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")
Example #6
0
def rwr_m(
    adj_matrix_files,
    train_node_labels_file,
    test_node_labels_file,
    use_cuda,
    params,
    metadata,
):
    mlflow.set_experiment("Predict")

    with mlflow.start_run(run_name=RUN_NAME):
        mlflow.log_param("model", MODEL_NAME)

        u_mlflow.add_params(**params)
        u_mlflow.add_metadata(metadata)
        mlflow.set_tag("use_cuda", use_cuda)

        train_labels = data_loaders.load_labels(train_node_labels_file,
                                                use_cuda=use_cuda)
        test_labels = data_loaders.load_labels(test_node_labels_file,
                                               use_cuda=use_cuda)
        labels = (train_labels.byte() | test_labels.byte()).long()

        print("Loading adjacency matrices")
        adj_matrices = data_loaders.load_adj_matrices(adj_matrix_files,
                                                      normalization=None,
                                                      use_cuda=use_cuda)

        adjacency_matrices = torch.stack(adj_matrices)

        print(RUN_NAME)
        ranks_df = predict.run(labels=labels,
                               model_class=RwrM,
                               adjacency_matrices=adjacency_matrices,
                               **params)

        u_mlflow.log_dataframe(ranks_df, "predictions", "results")