def test1(): """Simple random walk""" timer() pack = GraphContainer(find_meta(dataset_name), dloc) gt = pack.get_gt_graph() walks, index = undirected_randomwalk(gt) print(walks.shape) print(index) print(walks[:10]) print(walks[-10:]) timer(False)
def test3(): """Parallel random walk""" timer() pack = GraphContainer(find_meta(dataset_name), dloc) pwalker = ParallelWalkPimp(pack.get_gt_graph(), undirected_rw_kernel, args=(), num_proc=10) walks = pwalker.run() print(walks.shape) print(walks[:10]) print(walks[-10:]) timer(False)
def main(): args = parser.parse_args() dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data' set_dataloc(dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), dloc) print("Generating gt graph...") timer() gt = graph.get_gt_graph() timer(False) print("Creating {} model...".format(args.model)) timer() model = None modelm = None if "skipgram" == args.model.lower(): model = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) elif "skipgram_motif" == args.model.lower(): model = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) modelm = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset + "m") elif "edge_embedding" == args.model.lower(): model = EdgeEmbedding(num_nsamp=args.num_neg, name=args.dataset) elif "gcn" == args.model.lower(): print("TODO") elif "sc" == args.model.lower(): print("TODO") else: print("Unknown embedding model.") assert model is not None if modelm is not None: model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim // 2, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) else: model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) timer(False) print("Generating walks...") timer() walks = None mwalks = None if "undirected" == args.walk_type and not args.enable_parallel: walks, _ = undirected_randomwalk(gt, walk_length=args.walk_length, num_walk=args.num_walk) timer(False) if modelm is not None: print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) mwalks, _ = undirected_randomwalk(motif_view, walk_length=args.walk_length, num_walk=args.num_walk) elif "undirected" == args.walk_type and args.enable_parallel: pwalker = ParallelWalkPimp(gt, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) walks = pwalker.run() timer(False) if modelm is not None: print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) # TODO: dont use eval if (args.anchors is not None): motif.anchors = eval(args.anchors) # TODO: avoid eval motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) pmwalker = ParallelWalkPimp(motif_view, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) mwalks = pmwalker.run() elif "edges" == args.walk_type: walks = graph.get_graph() # walks here is the networkx version else: print("TODO") assert walks is not None timer(False) print("Start training ...") timer() emb = model.train(data=walks, num_step=args.num_step, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate) memb = None if modelm is not None: print("Start building and training for motif model...") modelm.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim // 2, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device, init_emb=emb) memb = modelm.train(data=mwalks, num_step=args.num_step, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate) timer(False) from time import time uid = str(time()) np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb) if memb is not None: np.save(args.save_loc + "{}_{}.memb".format(args.dataset, uid), memb) with open(args.save_loc + "{}_{}.info".format(args.dataset, uid), "w") as infofile: infofile.write(uid + '\n') args_dict = vars(args) for key, val in args_dict.items(): infofile.write("{}: {}\n".format(key, val))
def main(): args = parser.parse_args() dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data' set_dataloc(dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), dloc) print("Generating gt graph...") timer() gt = graph.get_gt_graph() timer(False) print("Creating MotifEmbedding model...") timer() model = None modelm = None model = MotifEmbedding(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) # TODO: dont use eval print(motif) if (args.anchors is not None): motif.anchors = eval(args.anchors) # TODO: avoid eval print(motif.anchors) motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) def to_int_tuple(t): t = tuple(t) return (int(t[0]), int(t[1])) all_motif_edges = [*map(to_int_tuple, motif_view.edges())] print(len(all_motif_edges)) motif_nx_graph = nx.Graph() motif_nx_graph.add_edges_from(all_motif_edges) timer(False) print("Create random walk context...") timer() pwalker = ParallelWalkPimp(gt, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) walks = pwalker.run() timer(False) print("Training with motif...") timer() emb = model.train(data=walks, nxg=motif_nx_graph, num_step=args.num_step, num_mstep=args.num_mstep, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate, finetune_rate=args.finetune_rate) timer(False) from time import time uid = str(time()) np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb) with open(args.save_loc + "{}_{}.info".format(args.dataset, uid), "w") as infofile: infofile.write(uid + '\n') args_dict = vars(args) infofile.write("Motif edge init.\n") for key, val in args_dict.items(): infofile.write("{}: {}\n".format(key, val))
def test3(): network = GraphContainer(find_meta(dataset_name), dloc) print("Generating gt graph...") timer() gt = network.get_gt_graph() timer(False) print("Creating Skipgram model...") timer() model = Skipgram(window_size=5, num_skip=2, num_nsamp=15) model.build(num_vertices=gt.num_vertices(), learning_rate=0.001, opt=ADAM) timer(False) print("Generating random walk...") timer() walks, index = undirected_randomwalk(gt) timer(False) print("Start training...") timer() emb = model.train(data=walks, num_step=1000000, log_step=2000, save_step=2) timer(False) np.save("cora.emb.npy", emb)