def eval_link_prediction(self): """choose the topK after removing the positive training links Args: test_dataset: Returns: accuracy: """ test_edges = utils.read_edges_from_file(self.test_filename) test_edges_neg = utils.read_edges_from_file(self.test_neg_filename) test_edges.extend(test_edges_neg) # may exists isolated point score_res = [] for i in range(len(test_edges)): score_res.append( np.dot(self.emd[test_edges[i][0]], self.emd[test_edges[i][1]])) test_label = np.array(score_res) bar = np.median(test_label) # ind_pos = test_label >= bar ind_neg = test_label < bar test_label[ind_pos] = 1 test_label[ind_neg] = 0 true_label = np.zeros(test_label.shape) true_label[0:len(true_label) // 2] = 1 accuracy = accuracy_score(true_label, test_label) return accuracy
def main(args): G_train = nx.read_weighted_edgelist(args.train, nodetype=int, create_using=nx.Graph()) G_test = nx.read_weighted_edgelist(args.test, nodetype=int, create_using=nx.Graph()) vector = read_node_vectors(args.embedding, G_test) print("=====Compute AUC====") auc = [] for node in tqdm(list(G_test.nodes())): try: auc.append(AUC_MR.compute(G_test, node, vector)) except ValueError: continue auc_mean = float(sum(auc) / len(auc)) print("=====Compute MR====") sequence_order = AUC_MR.result_rank(G_test, vector) mr = [] for node in tqdm(G_test.nodes()): try: mr.append(AUC_MR.mean_rank(G_test, node, sequence_order)) except ValueError: continue Mean_Rank = sum(mr) / len(mr) print("=====Compute ACC====") n_node = len(G_train.nodes()) neg_sample_link = [] for node in tqdm(G_test.nodes()): neg_sample_link.append( [node, ACC.generate_neg_link(G_test, args.negative_num, node)]) np.savetxt("temp/negtive_link.txt", np.asarray(neg_sample_link), fmt="%s", newline="\n", delimiter="\t") test_edge = utils.read_edges_from_file(args.test) test_edge_neg = utils.read_edges_from_file("temp/negtive_link.txt") test_edge.extend(test_edge_neg) EMB, EMBMAP = utils.read_embeddings(args.embedding, n_node, args.dimensions) acc = ACC.eval_link_prediction(test_edge, EMB, EMBMAP) print("=====Show Results====") dataset_name = args.train.split("/")[-1].split(".")[0] tb = pt.PrettyTable() tb.field_names = ["dataset", "AUC", "MR", "ACC"] tb.add_row([dataset_name, auc_mean, Mean_Rank, acc]) print(tb)
def eval_link_prediction(self): test_edges = utils.read_edges_from_file(self.test_filename) test_edges_neg = utils.read_edges_from_file(self.test_neg_filename) test_edges.extend(test_edges_neg) # may exists isolated point score_res = [] for i in range(len(test_edges)): score_res.append(np.dot(self.emd[test_edges[i][0]], self.emd[test_edges[i][1]])) test_label = np.array(score_res) median = np.median(test_label) index_pos = test_label >= median index_neg = test_label < median test_label[index_pos] = 1 test_label[index_neg] = 0 true_label = np.zeros(test_label.shape) true_label[0: len(true_label) // 2] = 1 accuracy = accuracy_score(true_label, test_label) return accuracy
def eval_test(config): """do the evaluation when training :return: """ results = [] if config.app == "link_prediction": LPE = elp.LinkPredictEval(config.emb_filename, config.test_filename, config.test_neg_filename, config.n_node, config.n_embed) result = LPE.eval_link_prediction() results.append(config.model + ":" + str(result) + "\n") with open(config.result_filename, mode="a+") as f: f.writelines(results) test_edges = utils.read_edges_from_file(config.test_filename) test_edges_neg = utils.read_edges_from_file(config.test_neg_filename) test_edges.extend(test_edges_neg) emd = utils.read_emd(config.emb_filename, n_node=config.n_node, n_embed=config.n_embed) score_res = [] for i in range(len(test_edges)): score_res.append(np.dot(emd[test_edges[i][0]], emd[test_edges[i][1]])) test_label = np.array(score_res) bar = np.median(test_label) ind_pos = test_label >= bar ind_neg = test_label < bar test_label[ind_pos] = 1 test_label[ind_neg] = 0 true_label = np.zeros(test_label.shape) true_label[0:len(true_label) // 2] = 1 f1 = f1_score(true_label, test_label, average='macro') result = config.model + ":" + str(f1) + "\n" print(result) with open(config.result_filename_f1, mode="a+") as f: f.writelines(result)
def batch_collect_features(self, seed, _): ef = os.path.join(self.log_base, ".coverededges" + str(random.randint(1, 100000000))) cf = os.path.join(self.log_base, ".cmptmp" + str(random.randint(1, 100000000))) icf = os.path.join(self.log_base, ".indcalltmp" + str(random.randint(1, 100000000))) ecf = os.path.join(self.log_base, ".extcalltmp" + str(random.randint(1, 100000000))) lf = os.path.join(self.log_base, ".sanlabeltmp" + str(random.randint(1, 100000000))) mopf = os.path.join(self.log_base, ".moptmp" + str(random.randint(1, 1000000000))) envs = { "AFL_LOC_TRACE_FILE": ef, "CMP_LOG": cf, "INDIRECT_CALL_LOG": icf, "EXTERNAL_CALL_LOG": ecf, "SAVIOR_LABEL_LOG": lf, "MEM_OP_LOG": mopf, "ASAN_OPTIONS": "detect_odr_violation=0" } utils.run_one_with_envs(self.replay_prog_cmd, seed, self.input_mode, envs, 0.2, _) cnum = utils.count_file_items(cf, with_weight=True) icnum = utils.count_file_items(icf, with_weight=True) ecnum = utils.count_file_items(ecf, with_weight=True) lnum = utils.count_file_items(lf, with_weight=True) mopnum = utils.count_file_items(mopf, with_weight=True) if os.path.exists(ef): self.input_to_edges_cache[seed] = utils.read_edges_from_file(ef) self.covered_fuzzer_edges |= set(self.input_to_edges_cache[seed]) os.unlink(ef) if os.path.exists(cf): self.input_to_num_cmp_cache[seed] = cnum os.unlink(cf) if os.path.exists(icf): self.input_to_num_indcall_cache[seed] = icnum os.unlink(icf) if os.path.exists(ecf): self.input_to_num_extcall_cache[seed] = ecnum os.unlink(ecf) if os.path.exists(lf): self.input_to_num_label_cache[seed] = lnum os.unlink(lf) if os.path.exists(mopf): self.input_to_num_memops_cache[seed] = mopnum os.unlink(mopf)
def eval_link_prediction(self): """choose the topK after removing the positive training links Args: test_dataset: Returns: accuracy: """ featureWt = utils.readFeatureWt(self.FeatureWtFilename) test_edges = utils.read_edges_from_file(self.test_filename) test_edges_neg = utils.read_edges_from_file(self.test_neg_filename) test_edges.extend(test_edges_neg) # may exists isolated point score_res = [] wProduct = [] if (self.FeatureWtFilename == "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/src/GraphGAN/genFeatWeights.txt" ): #print("yes") featureWt = np.multiply(featureWt, featureWt) #featureWt=np.multiply(featureWt, featureWt) for i in range(len(test_edges)): embProduct = np.dot(self.emd[test_edges[i][0]], self.emd[test_edges[i][1]]) wProduct = np.dot( np.multiply(self.emdFeatures[test_edges[i][0]], self.emdFeatures[test_edges[i][1]]), featureWt) score_res.append(embProduct + wProduct) with open(config.dotProduct, mode="a+") as f: f.writelines("emb" + str(embProduct) + "----wproduct----" + str(wProduct)) f.writelines("\n") test_label = np.array(score_res) bar = np.median(test_label) # #bar=0.5 #test------------------------------------median ind_pos = test_label >= bar ind_neg = test_label < bar test_label[ind_pos] = 1 test_label[ind_neg] = 0 true_label = np.zeros(test_label.shape) true_label[0:len(true_label) // 2] = 1 accuracy = accuracy_score(true_label, test_label) #test------------------------------------softmax for i in range(0, len(score_res)): score_res[i] = 1 / (1 + math.exp(-score_res[i])) test_label1 = np.array(score_res) barSoftMax = 0.5 ind_pos1 = test_label1 >= barSoftMax ind_neg1 = test_label1 < barSoftMax test_label1[ind_pos1] = 1 test_label1[ind_neg1] = 0 true_label1 = np.zeros(test_label1.shape) true_label1[0:len(true_label1) // 2] = 1 accuracy1 = accuracy_score(true_label1, test_label1) res = [] res.append(bar) res.append(accuracy) res.append(accuracy1) return (res)
n_node = 3312 n_embed = 32 emdFeaturesFilename = "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/features/citeseerNew/features.txt" nFeatures = 3703 #for generator---------------change here------------------------------------------------------------------ #FeatureWtFilename="/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/results/gplus/v_6_/disFeatWeights2.txt" FeatureWtFilename = "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/pre_train/citeseerNew/citeseerNew20_Weights_gen_median_6510.emb" #--------------------------------------------------------------------------------------------------------- #read emd = utils.read_emd(embed_filename, n_node=n_node, n_embed=n_embed) emdFeatures = utils.read_emd(emdFeaturesFilename, n_node=n_node, n_embed=nFeatures) featureWt = utils.readFeatureWt(FeatureWtFilename) test_edges_pos = utils.read_edges_from_file(test_pos_filename) test_edges_neg = utils.read_edges_from_file(test_neg_filename) #for generator---------------change here------------------------------------------------------------------ featureWt = np.multiply(featureWt, featureWt) #--------------------------------------------------------------------------------------------------------- score_res_pos = [] score_res_neg = [] #score_res= [] wProduct = [] for i in range(len(test_edges_pos)): embProduct = np.dot(emd[test_edges_pos[i][0]], emd[test_edges_pos[i][1]]) wProduct = np.dot( np.multiply(emdFeatures[test_edges_pos[i][0]], emdFeatures[test_edges_pos[i][1]]), featureWt) score_res_pos.append(embProduct + wProduct)