Ejemplo n.º 1
0
def main():
    args = parser.parse_args()

    print("Reading data...")
    set_dataloc(args.dloc)
    metadata = get_metadata()
    graph = GraphContainer(find_meta(args.dataset), args.dloc)

    print("Creating gt.Graph...")
    gt_graph = graph.get_gt_graph()

    assert args.motif_size == 4 or args.motif_size == 3  # Only motif 3 and 4

    all_motif = None
    if args.motif_size == 3:
        if gt_graph.is_directed():
            all_motif = all_3
        else:
            all_motif = all_u3
    else:
        if gt_graph.is_directed():
            all_motif = all_4
        else:
            all_motif = all_u4

    motif_func = None
    if args.num_shuffles <= 0:  # Motif count
        motif_func = motifs
    else:
        motif_func = motif_significance

    output = args.output + str(args.num_shuffles)


    print("Writing scores to file...")
    with open(output, "w") as ofile:
        info = "Dataset: {d} - Motif size: {m} - Directed: {di}\n".format(
                    d=args.dataset, m=args.motif_size,
                    di=str(gt_graph.is_directed()))
        ofile.write(info)

        for i, mc in enumerate(all_motif):
            idx = gt_graph.vertex_index.copy("int")
            shuffle(idx.a)
            g = Graph(gt_graph, vorder=idx)
            if args.num_shuffles <= 0:
                score = motifs(g, k=args.motif_size,
                               motif_list=[mc.gt_motif])[1][0]
            else:
                score = motif_significance(g, k=args.motif_size,
                                           n_shuffles=args.num_shuffles,
                                           motif_list=[mc.gt_motif])[1][0]
            r = "Motif index {}: {}\n".format(i, score)
            print(r)
            ofile.write(r)

    print("Motif analysis for {} is completed.".format(args.dataset))
Ejemplo n.º 2
0
def main(_):
    args = parser.parse_args()

    print("Reading data ...")
    set_dataloc(args.dloc)
    metadata = get_metadata()
    graph = GraphContainer(find_meta(args.dataset), args.dloc)
    try:
        emb = np.load(args.embedding_file + ".emb.npy")
    except FileNotFoundError:
        emb = np.load(args.embedding_file)
    except OSError:
        print("Trying to read embedding input as text ...")
        emb = read_emb_from_txt(args.embedding_file)
    try:
        with open(args.embedding_file + ".info", 'r') as f:
            print(f.read())
    except FileNotFoundError:
        print("No info is found.")
    eemb = None
    if args.extra_embedding is not None:
        eemb = np.load(args.extra_embedding)
        merger = merge_funcs[merge_types.index(args.merge_type)]
        emb = merger(emb, eemb)

    labels = graph.get_labels()
    valid_locs = np.where(np.sum(labels, axis=1) > 0)[0]  # Only labeled data
    if len(valid_locs) < labels.shape[0]:  # There are missing labeled data
        print("Selecting only labeled data.")
        emb = emb[valid_locs]
        labels = labels[valid_locs]

    print("Fitting embedding to {} classifier ...".format(args.classifier))
    try:
        clf = classifiers[clf_names.index(args.classifier)]
    except ValueError:
        print("Error: {} is undefined.".format(args.classifier))
        sys.exit(0)
    X_train, X_test, y_train, y_test = train_test_split(
        emb,
        labels,
        train_size=args.training_ratio,
        random_state=args.random_seed)
    top_k_list = get_top_k(y_test)
    mclf = TopKRanker(clf)
    mclf.fit(X_train, y_train)
    test_results = mclf.predict(X_test,
                                top_k_list,
                                num_classes=labels.shape[1])

    print("Reporting {} score for dataset {} with {} training ...".format(
        args.metric, args.dataset, args.training_ratio))
    sc_func = metric_funcs[metrics.index(args.metric)]
    sc, variation = sc_func(test_results, y_test)
    for s, v in zip(sc, variation):
        print("{} score: {}".format(v, s))
Ejemplo n.º 3
0
def test1():
    """Simple random walk"""
    timer()
    pack = GraphContainer(find_meta(dataset_name), dloc)
    gt = pack.get_gt_graph()
    walks, index = undirected_randomwalk(gt)
    print(walks.shape)
    print(index)
    print(walks[:10])
    print(walks[-10:])
    timer(False)
Ejemplo n.º 4
0
def test3():
    """Parallel random walk"""
    timer()
    pack = GraphContainer(find_meta(dataset_name), dloc)
    pwalker = ParallelWalkPimp(pack.get_gt_graph(),
                               undirected_rw_kernel,
                               args=(),
                               num_proc=10)
    walks = pwalker.run()
    print(walks.shape)
    print(walks[:10])
    print(walks[-10:])
    timer(False)
Ejemplo n.º 5
0
def test2(m_container):
    """Random walk on motif graph created by triangle"""
    pack = GraphContainer(find_meta(dataset_name), dloc)
    motif_graph = construct_motif_graph(pack, m_container)
    motif_view = filter_isolated(motif_graph)  # GraphView
    walks, index = undirected_randomwalk(motif_view, 10, 1)
    print(walks.shape)
    print(index)
    print(walks[:10])
    print(walks[-10:])
Ejemplo n.º 6
0
def test3():
    network = GraphContainer(find_meta(dataset_name), dloc)
    print("Generating gt graph...")
    timer()
    gt = network.get_gt_graph()
    timer(False)
    print("Creating Skipgram model...")
    timer()
    model = Skipgram(window_size=5, num_skip=2, num_nsamp=15)
    model.build(num_vertices=gt.num_vertices(), learning_rate=0.001, opt=ADAM)
    timer(False)
    print("Generating random walk...")
    timer()
    walks, index = undirected_randomwalk(gt)
    timer(False)
    print("Start training...")
    timer()
    emb = model.train(data=walks, num_step=1000000, log_step=2000, save_step=2)
    timer(False)
    np.save("cora.emb.npy", emb)
Ejemplo n.º 7
0
def main():
    args = parser.parse_args()
    dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data'
    set_dataloc(dloc)
    metadata = get_metadata()

    graph = GraphContainer(find_meta(args.dataset), dloc)
    print("Generating gt graph...")
    timer()
    gt = graph.get_gt_graph()
    timer(False)

    print("Creating {} model...".format(args.model))
    timer()
    model = None
    modelm = None
    if "skipgram" == args.model.lower():
        model = Skipgram(window_size=args.window_size,
                         num_skip=args.num_skip,
                         num_nsamp=args.num_neg,
                         name=args.dataset)
    elif "skipgram_motif" == args.model.lower():
        model = Skipgram(window_size=args.window_size,
                         num_skip=args.num_skip,
                         num_nsamp=args.num_neg,
                         name=args.dataset)
        modelm = Skipgram(window_size=args.window_size,
                          num_skip=args.num_skip,
                          num_nsamp=args.num_neg,
                          name=args.dataset + "m")
    elif "edge_embedding" == args.model.lower():
        model = EdgeEmbedding(num_nsamp=args.num_neg, name=args.dataset)
    elif "gcn" == args.model.lower():
        print("TODO")
    elif "sc" == args.model.lower():
        print("TODO")
    else:
        print("Unknown embedding model.")
    assert model is not None
    if modelm is not None:
        model.build(num_vertices=gt.num_vertices(),
                    emb_dim=args.emb_dim // 2,
                    batch_size=args.batch_size,
                    learning_rate=args.learning_rate,
                    regw=args.reg_strength,
                    device=args.device)
    else:
        model.build(num_vertices=gt.num_vertices(),
                    emb_dim=args.emb_dim,
                    batch_size=args.batch_size,
                    learning_rate=args.learning_rate,
                    regw=args.reg_strength,
                    device=args.device)
    timer(False)

    print("Generating walks...")
    timer()
    walks = None
    mwalks = None
    if "undirected" == args.walk_type and not args.enable_parallel:
        walks, _ = undirected_randomwalk(gt,
                                         walk_length=args.walk_length,
                                         num_walk=args.num_walk)
        timer(False)
        if modelm is not None:
            print("Generating motifwalk...")
            timer()
            assert len(args.motif)
            motif = eval(args.motif)
            motif_graph = construct_motif_graph(graph, motif)
            motif_view = filter_isolated(motif_graph)
            mwalks, _ = undirected_randomwalk(motif_view,
                                              walk_length=args.walk_length,
                                              num_walk=args.num_walk)
    elif "undirected" == args.walk_type and args.enable_parallel:
        pwalker = ParallelWalkPimp(gt,
                                   undirected_rw_kernel,
                                   args=(args.walk_length, ),
                                   num_proc=args.num_walk)
        walks = pwalker.run()
        timer(False)
        if modelm is not None:
            print("Generating motifwalk...")
            timer()
            assert len(args.motif)
            motif = eval(args.motif)  # TODO: dont use eval
            if (args.anchors is not None):
                motif.anchors = eval(args.anchors)  # TODO: avoid eval
            motif_graph = construct_motif_graph(graph, motif)
            motif_view = filter_isolated(motif_graph)
            pmwalker = ParallelWalkPimp(motif_view,
                                        undirected_rw_kernel,
                                        args=(args.walk_length, ),
                                        num_proc=args.num_walk)
            mwalks = pmwalker.run()
    elif "edges" == args.walk_type:
        walks = graph.get_graph()  # walks here is the networkx version
    else:
        print("TODO")
    assert walks is not None
    timer(False)

    print("Start training ...")
    timer()
    emb = model.train(data=walks,
                      num_step=args.num_step,
                      log_step=args.log_step,
                      save_step=args.save_step,
                      learning_rate=args.learning_rate)
    memb = None
    if modelm is not None:
        print("Start building and training for motif model...")
        modelm.build(num_vertices=gt.num_vertices(),
                     emb_dim=args.emb_dim // 2,
                     batch_size=args.batch_size,
                     learning_rate=args.learning_rate,
                     regw=args.reg_strength,
                     device=args.device,
                     init_emb=emb)
        memb = modelm.train(data=mwalks,
                            num_step=args.num_step,
                            log_step=args.log_step,
                            save_step=args.save_step,
                            learning_rate=args.learning_rate)
    timer(False)

    from time import time
    uid = str(time())
    np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb)
    if memb is not None:
        np.save(args.save_loc + "{}_{}.memb".format(args.dataset, uid), memb)

    with open(args.save_loc + "{}_{}.info".format(args.dataset, uid),
              "w") as infofile:
        infofile.write(uid + '\n')
        args_dict = vars(args)
        for key, val in args_dict.items():
            infofile.write("{}: {}\n".format(key, val))
Ejemplo n.º 8
0
def main():
    args = parser.parse_args()
    dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data'
    set_dataloc(dloc)
    metadata = get_metadata()

    graph = GraphContainer(find_meta(args.dataset), dloc)
    print("Generating gt graph...")
    timer()
    gt = graph.get_gt_graph()
    timer(False)

    print("Creating MotifEmbedding model...")
    timer()
    model = None
    modelm = None
    model = MotifEmbedding(window_size=args.window_size,
                           num_skip=args.num_skip,
                           num_nsamp=args.num_neg,
                           name=args.dataset)
    model.build(num_vertices=gt.num_vertices(),
                emb_dim=args.emb_dim,
                batch_size=args.batch_size,
                learning_rate=args.learning_rate,
                regw=args.reg_strength,
                device=args.device)

    print("Generating motifwalk...")
    timer()
    assert len(args.motif)
    motif = eval(args.motif)  # TODO: dont use eval
    print(motif)
    if (args.anchors is not None):
        motif.anchors = eval(args.anchors)  # TODO: avoid eval
        print(motif.anchors)
    motif_graph = construct_motif_graph(graph, motif)
    motif_view = filter_isolated(motif_graph)

    def to_int_tuple(t):
        t = tuple(t)
        return (int(t[0]), int(t[1]))

    all_motif_edges = [*map(to_int_tuple, motif_view.edges())]
    print(len(all_motif_edges))
    motif_nx_graph = nx.Graph()
    motif_nx_graph.add_edges_from(all_motif_edges)
    timer(False)

    print("Create random walk context...")
    timer()
    pwalker = ParallelWalkPimp(gt,
                               undirected_rw_kernel,
                               args=(args.walk_length, ),
                               num_proc=args.num_walk)
    walks = pwalker.run()
    timer(False)
    print("Training with motif...")
    timer()
    emb = model.train(data=walks,
                      nxg=motif_nx_graph,
                      num_step=args.num_step,
                      num_mstep=args.num_mstep,
                      log_step=args.log_step,
                      save_step=args.save_step,
                      learning_rate=args.learning_rate,
                      finetune_rate=args.finetune_rate)
    timer(False)
    from time import time
    uid = str(time())
    np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb)

    with open(args.save_loc + "{}_{}.info".format(args.dataset, uid),
              "w") as infofile:
        infofile.write(uid + '\n')
        args_dict = vars(args)
        infofile.write("Motif edge init.\n")
        for key, val in args_dict.items():
            infofile.write("{}: {}\n".format(key, val))