def evaluate_sage(): fname = iterate_datasets() os.chdir("/nfs/zty/Graph/Dynamic-Graph") cmd = "python -m experiment.graphsage --dataset {dataset} --epochs 50 --dropout 0.2 --weight_decay 1e-5 --learning_rate=0.0001 --nodisplay " commands = [cmd.format(dataset=name) for name in fname] print("Preprocessing finished.") Parallel(n_jobs=args.n_jobs)(delayed(os.system)(cmd) for cmd in commands)
def evaluate_htne(project_dir="/nfs/zty/Graph/4-htne/emb"): fname = iterate_datasets(dataset=args.dataset) fname = fname[args.start:args.end] if args.run: logger.info("Running {} embedding programs.".format(args.method)) run_htne(dataset=args.dataset, n_jobs=args.n_jobs, fname=fname) logger.info("Done training embedding.") else: logger.info("Use pretrained {} embeddings.".format(args.method)) for name in fname: logger.info(name) edgel, nodel = load_label_edges(dataset=name) train_edges, valid_edges, test_edges = id_map(edgel[0], nodel[0]) for hist_len in [20]: fpath = "{}/{}.emb{}".format(project_dir, name, hist_len) id2idx, embeds = load_embeddings(fpath, skiprows=1, sep=" ") X_train = edge2tabular(train_edges, id2idx, embeds) y_train = train_edges["label"] X_valid = edge2tabular(valid_edges, id2idx, embeds) y_valid = valid_edges["label"] X_test = edge2tabular(test_edges, id2idx, embeds) y_test = test_edges["label"] # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) vauc, acc, f1, auc = lr_evaluate(X_train, y_train, X_valid, y_valid, X_test, y_test) write_result(name, "htne", { "hist_len": hist_len, "epoch": 50 }, (vauc, acc, f1, auc))
def evaluate_node2vec(project_dir="/nfs/zty/Graph/0-node2vec/emb"): fname = iterate_datasets() fname = fname[args.start:args.end] if args.run: logger.info("Running {} embedding programs.".format("node2vec")) run_node2vec(dataset=args.dataset, n_jobs=args.n_jobs, fname=fname, start=args.start, end=args.end, times=args.times) logger.info("Done node2vec embedding.") else: logger.info("Use pretrained {} embeddings.".format("node2vec")) for name, p, q in product(fname, [0.25, 0.5, 1, 2, 4], [0.25, 0.5, 1, 2, 4]): logger.info("dataset={}, p={:.2f}, q={:.2f}".format(name, p, q)) edges, nodes = load_label_edges(dataset=name) train_edges, valid_edges, test_edges = id_map(edges[0], nodes[0]) fpath = "{}/{}-{p:.2f}-{q:.2f}.emb".format(project_dir, name, p=p, q=q) id2idx, embs = load_embeddings(fpath, skiprows=1) X_train = edge2tabular(train_edges, id2idx, embs) y_train = train_edges["label"] X_valid = edge2tabular(valid_edges, id2idx, embs) y_valid = valid_edges["label"] X_test = edge2tabular(test_edges, id2idx, embs) y_test = test_edges["label"] # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) vauc, acc, f1, auc = lr_evaluate(X_train, y_train, X_valid, y_valid, X_test, y_test) write_result(name, "node2vec", {"p": p, "q": q}, (vauc, acc, f1, auc)) pass
def evaluate_ctdne(project_dir="/nfs/zty/Graph/Dynamic-Graph/ctdne_embs"): fname = iterate_datasets(dataset=args.dataset) fname = fname[args.start:args.end] if args.run: logger.info("Running {} embedding programs.".format(args.method)) Parallel(n_jobs=args.n_jobs)(delayed(run_ctdne)(fname=[name]) for name in fname) logger.info("Done {} embeddings.".format(args.method)) for name in fname: logger.info( "dataset={}, num_walk=10, walk_length=80, context_window=10". format(name)) fpath = "{}/{}.emb".format(project_dir, name) id2idx, embeds = load_embeddings(fpath, skiprows=0, sep=" ") edgel, nodel = load_label_edges(dataset=name) train_edges, valid_edges, test_edges = id_map(edgel[0], nodel[0]) X_train = edge2tabular(train_edges, id2idx, embeds) y_train = train_edges["label"] X_valid = edge2tabular(valid_edges, id2idx, embeds) y_valid = valid_edges["label"] X_test = edge2tabular(test_edges, id2idx, embeds) y_test = test_edges["label"] # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) vauc, acc, f1, auc = lr_evaluate(X_train, y_train, X_valid, y_valid, X_test, y_test) write_result(name, "ctdne", { "num_walk": 10, "walk_length": 80, "context_window": 10 }, (vauc, acc, f1, auc))
def run_htne(dataset="all", project_dir="/nfs/zty/Graph/4-htne/", n_jobs=4, **kwargs): fname = iterate_datasets(dataset=dataset) command = "python {project_dir}/HTNE.py -d {input_path} -o {output_path} --hist-len {hist_len}" commands = [] for name in fname: input_path = os.path.join(project_dir, "data/{}.edges".format(name)) if not os.path.exists(input_path): df, nodes = load_data(dataset=name, mode="train")[0] df["timestamp"] = (df["timestamp"] - df["timestamp"].min()) / \ (df["timestamp"].max() - df["timestamp"].min()) id2idx = {row.node_id: row.id_map for row in nodes.itertuples()} df["from_node_id"] = df["from_node_id"].map(id2idx) df["to_node_id"] = df["to_node_id"].map(id2idx) df = df[["from_node_id", "to_node_id", "timestamp"]] df.to_csv(input_path, index=None, header=None, sep=" ") output_path = os.path.join(project_dir, "emb/{}.emb".format(name)) for hist_len in [20]: hist_path = output_path + str(hist_len) commands.append( command.format(project_dir=project_dir, input_path=input_path, output_path=hist_path, hist_len=hist_len)) print("Preprocessing finished.") Parallel(n_jobs=n_jobs)(delayed(os.system)(cmd) for cmd in commands)
def evaluate_tnode(): fname = iterate_datasets(dataset=args.dataset) fname = fname[args.start:args.end] logger.info("Running {} embedding programs.".format(args.method)) run_tnode(dataset=args.dataset, n_jobs=args.n_jobs, start=args.start, end=args.end) logger.info("Done {}.".format(args.method))
def evaluate_tgat(project_dir="/nfs/zty/Graph/TGAT-bk"): fname = iterate_datasets(dataset=args.dataset) fname = fname[args.start:args.end] # command = "python {}/exper_edge.py -d {} --gpu {} -f --uniform " # command = "python {}/exper_edge.py -d {} --gpu {} -f" # command = "python {}/exper_edge.py -d {} --gpu {} --uniform" command = "python {}/exper_edge.py -d {} --gpu {} --time empty " commands = [] for name in fname: commands.append(command.format(project_dir, name, args.gid)) os.chdir(project_dir) print("Preprocessing finished.") for cmd in commands: os.system(cmd)
def run_tnode(dataset="all", project_dir="/nfs/zty/Graph/5-tNodeEmbed/", n_jobs=16, **kwargs): fname = iterate_datasets(dataset=dataset) fname = fname[kwargs["start"]:kwargs["end"]] command = "python {project_dir}/src/main.py -d {dataset} -n {nstep}" commands = [] for name, nstep in product(fname, [128, 32, 8]): dump_foler = os.path.join(project_dir, "data/{}".format(name)) if not os.path.exists(dump_foler): os.makedirs(dump_foler) os.chmod(dump_foler, 0o777) cmd = command.format(project_dir=project_dir, dataset=name, nstep=nstep) commands.append(cmd) os.chdir(project_dir) print("Preprocessing finished.") Parallel(n_jobs=n_jobs)(delayed(os.system)(cmd) for cmd in commands)
def evaluate_triad(project_dir="/nfs/zty/Graph/2-DynamicTriad/output"): fname = iterate_datasets(dataset=args.dataset) fname = fname[args.start:args.end] if args.run: logger.info("Running {} embedding programs.".format(args.method)) run_triad(dataset=args.dataset, n_jobs=args.n_jobs, fname=fname, start=args.start, end=args.end, times=args.times) logger.info("Done training embedding.") else: logger.info("Use pretrained {} embeddings.".format(args.method)) for name, stepsize in product(fname, [1, 4, 8]): logger.info(name) edgel, nodel = load_label_edges(dataset=name) train_edges, valid_edges, test_edges = id_map(edgel[0], nodel[0]) fdir = "{}/{}-{}/".format(project_dir, name, stepsize) step_embeds = [ load_embeddings(fdir + f, skiprows=0) for f in os.listdir(fdir) ] id2idx, embeds = step_embeds[-1] X_train = edge2tabular(train_edges, id2idx, embeds) y_train = train_edges["label"] X_valid = edge2tabular(valid_edges, id2idx, embeds) y_valid = valid_edges["label"] X_test = edge2tabular(test_edges, id2idx, embeds) y_test = test_edges["label"] # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) vauc, acc, f1, auc = lr_evaluate(X_train, y_train, X_valid, y_valid, X_test, y_test) write_result(name, "triad", { "beta_1": 0.1, "beta_2": 0.1, "stepsize": stepsize }, (vauc, acc, f1, auc))
def run_gta(dataset="all", project_dir="/nfs/zty/Graph/Dynamic-Graph/", n_jobs=4, **kwargs): start, end = kwargs["start"], kwargs["end"] fname = iterate_datasets(dataset=dataset)[start:end] os.chdir(project_dir) command = "python main.py --dataset {dataset} --epochs 50 --dropout 0.2 --weight_decay 1e-5 --learning_rate=0.0001 --nodisplay " commands = [] comps = [] for name in fname: cmd = command.format(dataset=name) commands.append(cmd) commands.append(cmd + " --nodynamic_neighbor") commands.append(cmd + " --sampler temporal") commands.append(cmd + " --sampler temporal --use_context --context_size 20") commands.append(cmd + " --sampler temporal --use_context --context_size 40") commands.append(cmd + " --sampler temporal --use_context --context_size 80") commands.append(cmd + " --sampler temporal --use_context --context_size 160") commands.append(cmd + " --sampler mask --use_context --context_size 20") commands.append(cmd + " --sampler mask --use_context --context_size 40") commands.append(cmd + " --sampler mask --use_context --context_size 80") commands.append(cmd + " --sampler mask --use_context --context_size 160") # comps = repeat_string(comps) # commands = repeat_string(commands, times=1) print("Preprocessing finished.") # Parallel(n_jobs=n_jobs)(delayed(os.system)(cmd) for cmd in comps) Parallel(n_jobs=n_jobs)(delayed(os.system)(cmd) for cmd in commands)