Beispiel #1
0
def do_link_prediction(
    NETWORK_FILE,
    TEST_EDGE_FILE,
    NEGATIVE_EDGE_FILE,
    DIRECTED,
    OUT_FILE,
    DIM,
    NUMBER_OF_CORES,
):
    G = read_graph(NETWORK_FILE, directed=DIRECTED)
    model = node2vec.Node2Vec(G,
                              directed=DIRECTED,
                              dimensions=DIM,
                              workers=NUMBER_OF_CORES)
    model.simulate_walks()
    emb = model.learn_embedding()

    test_edges = read_edge_file(TEST_EDGE_FILE)
    negative_edges = read_edge_file(NEGATIVE_EDGE_FILE)

    name = "\t".join([NETWORK_FILE, str(DIM)])
    test = LinkPredictionTask(test_edges,
                              negative_edges,
                              emb,
                              name=name,
                              is_persona_emb=False)
    test.do_link_prediction()
    test.write_result(OUT_FILE)
def do_link_prediction(
    NETWORK_FILE,
    TEST_EDGE_FILE,
    NEGATIVE_EDGE_FILE,
    DIRECTED,
    OUT_FILE,
    LAMBDA,
    DIM,
    NUMBER_OF_CORES,
):
    G = read_graph(NETWORK_FILE, directed=DIRECTED)
    model = Persona2Vec(G,
                        lambd=LAMBDA,
                        directed=DIRECTED,
                        dimensions=DIM,
                        workers=NUMBER_OF_CORES)
    emb = model.embedding

    test_edges = read_edge_file(TEST_EDGE_FILE)
    negative_edges = read_edge_file(NEGATIVE_EDGE_FILE)

    name = "\t".join([NETWORK_FILE, str(LAMBDA), str(DIM)])
    test = LinkPredictionTask(
        test_edges,
        negative_edges,
        emb,
        name=name,
        is_persona_emb=True,
        node_to_persona=model.node_to_persona,
    )
    test.do_link_prediction()
    test.write_result(OUT_FILE)
Beispiel #3
0
def main():
    """
    Parsing command line parameters.
    Reading data, embedding base graph, creating persona graph and learning a splitter.
    saving the persona mapping and the embedding.
    """
    args = parse_args()
    tab_printer(args)
    G = read_graph(args.input, args.weighted, args.directed)
    model = Persona2Vec(
        G,
        lambd=args.lambd,
        clustering_method="connected_component",
        directed=args.directed,
        num_walks_base=args.num_walks_base,
        walk_length_base=args.walk_length_base,
        window_size_base=args.window_size_base,
        num_walks_persona=args.num_walks_persona,
        walk_length_persona=args.walk_length_persona,
        window_size_persona=args.window_size_persona,
        p=args.p,
        q=args.q,
        dimensions=args.dimensions,
        epoch_base=args.epoch_base,
        epoch_persona=args.epoch_persona,
        workers=args.workers,
    )

    model.save_persona_network(args.persona_network)
    model.save_persona_to_node_mapping(args.persona_to_node)
    model.save_node_to_persona_mapping(args.node_to_persona)
    model.save_base_embedding(args.base_emb)
    model.save_persona_embedding(args.persona_emb)
Beispiel #4
0
def train_test_set_split(IN_FILE, INDEX, DIRECTED):
    G = read_graph(IN_FILE, directed=DIRECTED)
    OUTPUT_PATH = "{}_{}".format(IN_FILE.split(".")[0], INDEX)
    splitter = NetworkTrainTestSplitter(G, directed=DIRECTED)
    splitter.train_test_split()
    splitter.generate_negative_edges()
    splitter.save_splitted_result(OUTPUT_PATH)
def do_link_prediction(
    NETWORK_FILE,
    TEST_EDGE_FILE,
    NEGATIVE_EDGE_FILE,
    DIRECTED,
    OUT_FILE,
    DIM,
    NUMBER_OF_CORES,
):
    G = read_graph(NETWORK_FILE, directed=DIRECTED)
    model = node2vec.Node2Vec(G,
                              directed=DIRECTED,
                              dimensions=DIM,
                              workers=NUMBER_OF_CORES)
    model.simulate_walks()
    emb = model.learn_embedding()

    test_edges = read_edge_file(TEST_EDGE_FILE)
    negative_edges = read_edge_file(NEGATIVE_EDGE_FILE)

    test_hardmord_vectors = [
        np.multiply(emb[src], emb[tag]) for src, tag in test_edges
    ]
    negative_hardmord_vectors = [
        np.multiply(emb[src], emb[tag]) for src, tag in negative_edges
    ]
    xs = np.concatenate([test_hardmord_vectors, negative_hardmord_vectors])
    ys = np.concatenate([
        np.ones(len(test_hardmord_vectors)),
        np.zeros(len(negative_hardmord_vectors))
    ])

    clf = LogisticRegression(random_state=0).fit(xs, ys)
    predicted_ys = clf.predict(xs)
    ROC_AUC_value = roc_auc_score(ys, predicted_ys)

    name = "\t".join([NETWORK_FILE, str(DIM)])
    f = open(OUT_FILE, "a")
    f.write("{}\t{}\n".format(*[name, str(ROC_AUC_value)]))
    f.close()
def convert_network_files_for_splitter(NETWORK_FILE, CONVERTED_NETWORK_FILE, DIRECTED):
    G = read_graph(NETWORK_FILE, directed=DIRECTED)
    G_ = nx.convert_node_labels_to_integers(G, label_attribute="old_label")
    nx.write_edgelist(G_, CONVERTED_NETWORK_FILE)
    return G_