Ejemplo n.º 1
0
def test1():
    """Simple random walk"""
    timer()
    pack = GraphContainer(find_meta(dataset_name), dloc)
    gt = pack.get_gt_graph()
    walks, index = undirected_randomwalk(gt)
    print(walks.shape)
    print(index)
    print(walks[:10])
    print(walks[-10:])
    timer(False)
Ejemplo n.º 2
0
def test3():
    """Parallel random walk"""
    timer()
    pack = GraphContainer(find_meta(dataset_name), dloc)
    pwalker = ParallelWalkPimp(pack.get_gt_graph(),
                               undirected_rw_kernel,
                               args=(),
                               num_proc=10)
    walks = pwalker.run()
    print(walks.shape)
    print(walks[:10])
    print(walks[-10:])
    timer(False)
Ejemplo n.º 3
0
def main():
    args = parser.parse_args()
    dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data'
    set_dataloc(dloc)
    metadata = get_metadata()

    graph = GraphContainer(find_meta(args.dataset), dloc)
    print("Generating gt graph...")
    timer()
    gt = graph.get_gt_graph()
    timer(False)

    print("Creating {} model...".format(args.model))
    timer()
    model = None
    modelm = None
    if "skipgram" == args.model.lower():
        model = Skipgram(window_size=args.window_size,
                         num_skip=args.num_skip,
                         num_nsamp=args.num_neg,
                         name=args.dataset)
    elif "skipgram_motif" == args.model.lower():
        model = Skipgram(window_size=args.window_size,
                         num_skip=args.num_skip,
                         num_nsamp=args.num_neg,
                         name=args.dataset)
        modelm = Skipgram(window_size=args.window_size,
                          num_skip=args.num_skip,
                          num_nsamp=args.num_neg,
                          name=args.dataset + "m")
    elif "edge_embedding" == args.model.lower():
        model = EdgeEmbedding(num_nsamp=args.num_neg, name=args.dataset)
    elif "gcn" == args.model.lower():
        print("TODO")
    elif "sc" == args.model.lower():
        print("TODO")
    else:
        print("Unknown embedding model.")
    assert model is not None
    if modelm is not None:
        model.build(num_vertices=gt.num_vertices(),
                    emb_dim=args.emb_dim // 2,
                    batch_size=args.batch_size,
                    learning_rate=args.learning_rate,
                    regw=args.reg_strength,
                    device=args.device)
    else:
        model.build(num_vertices=gt.num_vertices(),
                    emb_dim=args.emb_dim,
                    batch_size=args.batch_size,
                    learning_rate=args.learning_rate,
                    regw=args.reg_strength,
                    device=args.device)
    timer(False)

    print("Generating walks...")
    timer()
    walks = None
    mwalks = None
    if "undirected" == args.walk_type and not args.enable_parallel:
        walks, _ = undirected_randomwalk(gt,
                                         walk_length=args.walk_length,
                                         num_walk=args.num_walk)
        timer(False)
        if modelm is not None:
            print("Generating motifwalk...")
            timer()
            assert len(args.motif)
            motif = eval(args.motif)
            motif_graph = construct_motif_graph(graph, motif)
            motif_view = filter_isolated(motif_graph)
            mwalks, _ = undirected_randomwalk(motif_view,
                                              walk_length=args.walk_length,
                                              num_walk=args.num_walk)
    elif "undirected" == args.walk_type and args.enable_parallel:
        pwalker = ParallelWalkPimp(gt,
                                   undirected_rw_kernel,
                                   args=(args.walk_length, ),
                                   num_proc=args.num_walk)
        walks = pwalker.run()
        timer(False)
        if modelm is not None:
            print("Generating motifwalk...")
            timer()
            assert len(args.motif)
            motif = eval(args.motif)  # TODO: dont use eval
            if (args.anchors is not None):
                motif.anchors = eval(args.anchors)  # TODO: avoid eval
            motif_graph = construct_motif_graph(graph, motif)
            motif_view = filter_isolated(motif_graph)
            pmwalker = ParallelWalkPimp(motif_view,
                                        undirected_rw_kernel,
                                        args=(args.walk_length, ),
                                        num_proc=args.num_walk)
            mwalks = pmwalker.run()
    elif "edges" == args.walk_type:
        walks = graph.get_graph()  # walks here is the networkx version
    else:
        print("TODO")
    assert walks is not None
    timer(False)

    print("Start training ...")
    timer()
    emb = model.train(data=walks,
                      num_step=args.num_step,
                      log_step=args.log_step,
                      save_step=args.save_step,
                      learning_rate=args.learning_rate)
    memb = None
    if modelm is not None:
        print("Start building and training for motif model...")
        modelm.build(num_vertices=gt.num_vertices(),
                     emb_dim=args.emb_dim // 2,
                     batch_size=args.batch_size,
                     learning_rate=args.learning_rate,
                     regw=args.reg_strength,
                     device=args.device,
                     init_emb=emb)
        memb = modelm.train(data=mwalks,
                            num_step=args.num_step,
                            log_step=args.log_step,
                            save_step=args.save_step,
                            learning_rate=args.learning_rate)
    timer(False)

    from time import time
    uid = str(time())
    np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb)
    if memb is not None:
        np.save(args.save_loc + "{}_{}.memb".format(args.dataset, uid), memb)

    with open(args.save_loc + "{}_{}.info".format(args.dataset, uid),
              "w") as infofile:
        infofile.write(uid + '\n')
        args_dict = vars(args)
        for key, val in args_dict.items():
            infofile.write("{}: {}\n".format(key, val))
Ejemplo n.º 4
0
def main():
    args = parser.parse_args()
    dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data'
    set_dataloc(dloc)
    metadata = get_metadata()

    graph = GraphContainer(find_meta(args.dataset), dloc)
    print("Generating gt graph...")
    timer()
    gt = graph.get_gt_graph()
    timer(False)

    print("Creating MotifEmbedding model...")
    timer()
    model = None
    modelm = None
    model = MotifEmbedding(window_size=args.window_size,
                           num_skip=args.num_skip,
                           num_nsamp=args.num_neg,
                           name=args.dataset)
    model.build(num_vertices=gt.num_vertices(),
                emb_dim=args.emb_dim,
                batch_size=args.batch_size,
                learning_rate=args.learning_rate,
                regw=args.reg_strength,
                device=args.device)

    print("Generating motifwalk...")
    timer()
    assert len(args.motif)
    motif = eval(args.motif)  # TODO: dont use eval
    print(motif)
    if (args.anchors is not None):
        motif.anchors = eval(args.anchors)  # TODO: avoid eval
        print(motif.anchors)
    motif_graph = construct_motif_graph(graph, motif)
    motif_view = filter_isolated(motif_graph)

    def to_int_tuple(t):
        t = tuple(t)
        return (int(t[0]), int(t[1]))

    all_motif_edges = [*map(to_int_tuple, motif_view.edges())]
    print(len(all_motif_edges))
    motif_nx_graph = nx.Graph()
    motif_nx_graph.add_edges_from(all_motif_edges)
    timer(False)

    print("Create random walk context...")
    timer()
    pwalker = ParallelWalkPimp(gt,
                               undirected_rw_kernel,
                               args=(args.walk_length, ),
                               num_proc=args.num_walk)
    walks = pwalker.run()
    timer(False)
    print("Training with motif...")
    timer()
    emb = model.train(data=walks,
                      nxg=motif_nx_graph,
                      num_step=args.num_step,
                      num_mstep=args.num_mstep,
                      log_step=args.log_step,
                      save_step=args.save_step,
                      learning_rate=args.learning_rate,
                      finetune_rate=args.finetune_rate)
    timer(False)
    from time import time
    uid = str(time())
    np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb)

    with open(args.save_loc + "{}_{}.info".format(args.dataset, uid),
              "w") as infofile:
        infofile.write(uid + '\n')
        args_dict = vars(args)
        infofile.write("Motif edge init.\n")
        for key, val in args_dict.items():
            infofile.write("{}: {}\n".format(key, val))
Ejemplo n.º 5
0
def test3():
    network = GraphContainer(find_meta(dataset_name), dloc)
    print("Generating gt graph...")
    timer()
    gt = network.get_gt_graph()
    timer(False)
    print("Creating Skipgram model...")
    timer()
    model = Skipgram(window_size=5, num_skip=2, num_nsamp=15)
    model.build(num_vertices=gt.num_vertices(), learning_rate=0.001, opt=ADAM)
    timer(False)
    print("Generating random walk...")
    timer()
    walks, index = undirected_randomwalk(gt)
    timer(False)
    print("Start training...")
    timer()
    emb = model.train(data=walks, num_step=1000000, log_step=2000, save_step=2)
    timer(False)
    np.save("cora.emb.npy", emb)