def main(): args = parser.parse_args() print("Reading data...") set_dataloc(args.dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), args.dloc) print("Creating gt.Graph...") gt_graph = graph.get_gt_graph() assert args.motif_size == 4 or args.motif_size == 3 # Only motif 3 and 4 all_motif = None if args.motif_size == 3: if gt_graph.is_directed(): all_motif = all_3 else: all_motif = all_u3 else: if gt_graph.is_directed(): all_motif = all_4 else: all_motif = all_u4 motif_func = None if args.num_shuffles <= 0: # Motif count motif_func = motifs else: motif_func = motif_significance output = args.output + str(args.num_shuffles) print("Writing scores to file...") with open(output, "w") as ofile: info = "Dataset: {d} - Motif size: {m} - Directed: {di}\n".format( d=args.dataset, m=args.motif_size, di=str(gt_graph.is_directed())) ofile.write(info) for i, mc in enumerate(all_motif): idx = gt_graph.vertex_index.copy("int") shuffle(idx.a) g = Graph(gt_graph, vorder=idx) if args.num_shuffles <= 0: score = motifs(g, k=args.motif_size, motif_list=[mc.gt_motif])[1][0] else: score = motif_significance(g, k=args.motif_size, n_shuffles=args.num_shuffles, motif_list=[mc.gt_motif])[1][0] r = "Motif index {}: {}\n".format(i, score) print(r) ofile.write(r) print("Motif analysis for {} is completed.".format(args.dataset))
def main(_): args = parser.parse_args() print("Reading data ...") set_dataloc(args.dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), args.dloc) try: emb = np.load(args.embedding_file + ".emb.npy") except FileNotFoundError: emb = np.load(args.embedding_file) except OSError: print("Trying to read embedding input as text ...") emb = read_emb_from_txt(args.embedding_file) try: with open(args.embedding_file + ".info", 'r') as f: print(f.read()) except FileNotFoundError: print("No info is found.") eemb = None if args.extra_embedding is not None: eemb = np.load(args.extra_embedding) merger = merge_funcs[merge_types.index(args.merge_type)] emb = merger(emb, eemb) labels = graph.get_labels() valid_locs = np.where(np.sum(labels, axis=1) > 0)[0] # Only labeled data if len(valid_locs) < labels.shape[0]: # There are missing labeled data print("Selecting only labeled data.") emb = emb[valid_locs] labels = labels[valid_locs] print("Fitting embedding to {} classifier ...".format(args.classifier)) try: clf = classifiers[clf_names.index(args.classifier)] except ValueError: print("Error: {} is undefined.".format(args.classifier)) sys.exit(0) X_train, X_test, y_train, y_test = train_test_split( emb, labels, train_size=args.training_ratio, random_state=args.random_seed) top_k_list = get_top_k(y_test) mclf = TopKRanker(clf) mclf.fit(X_train, y_train) test_results = mclf.predict(X_test, top_k_list, num_classes=labels.shape[1]) print("Reporting {} score for dataset {} with {} training ...".format( args.metric, args.dataset, args.training_ratio)) sc_func = metric_funcs[metrics.index(args.metric)] sc, variation = sc_func(test_results, y_test) for s, v in zip(sc, variation): print("{} score: {}".format(v, s))
def test1(): """Simple random walk""" timer() pack = GraphContainer(find_meta(dataset_name), dloc) gt = pack.get_gt_graph() walks, index = undirected_randomwalk(gt) print(walks.shape) print(index) print(walks[:10]) print(walks[-10:]) timer(False)
def test3(): """Parallel random walk""" timer() pack = GraphContainer(find_meta(dataset_name), dloc) pwalker = ParallelWalkPimp(pack.get_gt_graph(), undirected_rw_kernel, args=(), num_proc=10) walks = pwalker.run() print(walks.shape) print(walks[:10]) print(walks[-10:]) timer(False)
def test2(m_container): """Random walk on motif graph created by triangle""" pack = GraphContainer(find_meta(dataset_name), dloc) motif_graph = construct_motif_graph(pack, m_container) motif_view = filter_isolated(motif_graph) # GraphView walks, index = undirected_randomwalk(motif_view, 10, 1) print(walks.shape) print(index) print(walks[:10]) print(walks[-10:])
def test3(): network = GraphContainer(find_meta(dataset_name), dloc) print("Generating gt graph...") timer() gt = network.get_gt_graph() timer(False) print("Creating Skipgram model...") timer() model = Skipgram(window_size=5, num_skip=2, num_nsamp=15) model.build(num_vertices=gt.num_vertices(), learning_rate=0.001, opt=ADAM) timer(False) print("Generating random walk...") timer() walks, index = undirected_randomwalk(gt) timer(False) print("Start training...") timer() emb = model.train(data=walks, num_step=1000000, log_step=2000, save_step=2) timer(False) np.save("cora.emb.npy", emb)
def main(): args = parser.parse_args() dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data' set_dataloc(dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), dloc) print("Generating gt graph...") timer() gt = graph.get_gt_graph() timer(False) print("Creating {} model...".format(args.model)) timer() model = None modelm = None if "skipgram" == args.model.lower(): model = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) elif "skipgram_motif" == args.model.lower(): model = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) modelm = Skipgram(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset + "m") elif "edge_embedding" == args.model.lower(): model = EdgeEmbedding(num_nsamp=args.num_neg, name=args.dataset) elif "gcn" == args.model.lower(): print("TODO") elif "sc" == args.model.lower(): print("TODO") else: print("Unknown embedding model.") assert model is not None if modelm is not None: model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim // 2, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) else: model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) timer(False) print("Generating walks...") timer() walks = None mwalks = None if "undirected" == args.walk_type and not args.enable_parallel: walks, _ = undirected_randomwalk(gt, walk_length=args.walk_length, num_walk=args.num_walk) timer(False) if modelm is not None: print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) mwalks, _ = undirected_randomwalk(motif_view, walk_length=args.walk_length, num_walk=args.num_walk) elif "undirected" == args.walk_type and args.enable_parallel: pwalker = ParallelWalkPimp(gt, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) walks = pwalker.run() timer(False) if modelm is not None: print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) # TODO: dont use eval if (args.anchors is not None): motif.anchors = eval(args.anchors) # TODO: avoid eval motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) pmwalker = ParallelWalkPimp(motif_view, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) mwalks = pmwalker.run() elif "edges" == args.walk_type: walks = graph.get_graph() # walks here is the networkx version else: print("TODO") assert walks is not None timer(False) print("Start training ...") timer() emb = model.train(data=walks, num_step=args.num_step, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate) memb = None if modelm is not None: print("Start building and training for motif model...") modelm.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim // 2, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device, init_emb=emb) memb = modelm.train(data=mwalks, num_step=args.num_step, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate) timer(False) from time import time uid = str(time()) np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb) if memb is not None: np.save(args.save_loc + "{}_{}.memb".format(args.dataset, uid), memb) with open(args.save_loc + "{}_{}.info".format(args.dataset, uid), "w") as infofile: infofile.write(uid + '\n') args_dict = vars(args) for key, val in args_dict.items(): infofile.write("{}: {}\n".format(key, val))
def main(): args = parser.parse_args() dloc = '/home/gear/Dropbox/CompletedProjects/motifwalk/data' set_dataloc(dloc) metadata = get_metadata() graph = GraphContainer(find_meta(args.dataset), dloc) print("Generating gt graph...") timer() gt = graph.get_gt_graph() timer(False) print("Creating MotifEmbedding model...") timer() model = None modelm = None model = MotifEmbedding(window_size=args.window_size, num_skip=args.num_skip, num_nsamp=args.num_neg, name=args.dataset) model.build(num_vertices=gt.num_vertices(), emb_dim=args.emb_dim, batch_size=args.batch_size, learning_rate=args.learning_rate, regw=args.reg_strength, device=args.device) print("Generating motifwalk...") timer() assert len(args.motif) motif = eval(args.motif) # TODO: dont use eval print(motif) if (args.anchors is not None): motif.anchors = eval(args.anchors) # TODO: avoid eval print(motif.anchors) motif_graph = construct_motif_graph(graph, motif) motif_view = filter_isolated(motif_graph) def to_int_tuple(t): t = tuple(t) return (int(t[0]), int(t[1])) all_motif_edges = [*map(to_int_tuple, motif_view.edges())] print(len(all_motif_edges)) motif_nx_graph = nx.Graph() motif_nx_graph.add_edges_from(all_motif_edges) timer(False) print("Create random walk context...") timer() pwalker = ParallelWalkPimp(gt, undirected_rw_kernel, args=(args.walk_length, ), num_proc=args.num_walk) walks = pwalker.run() timer(False) print("Training with motif...") timer() emb = model.train(data=walks, nxg=motif_nx_graph, num_step=args.num_step, num_mstep=args.num_mstep, log_step=args.log_step, save_step=args.save_step, learning_rate=args.learning_rate, finetune_rate=args.finetune_rate) timer(False) from time import time uid = str(time()) np.save(args.save_loc + "{}_{}.emb".format(args.dataset, uid), emb) with open(args.save_loc + "{}_{}.info".format(args.dataset, uid), "w") as infofile: infofile.write(uid + '\n') args_dict = vars(args) infofile.write("Motif edge init.\n") for key, val in args_dict.items(): infofile.write("{}: {}\n".format(key, val))