Esempio n. 1
0
    def eval_link_prediction(self):
        """choose the topK after removing the positive training links
        Args:
            test_dataset:
        Returns:
            accuracy:
        """

        test_edges = utils.read_edges_from_file(self.test_filename)
        test_edges_neg = utils.read_edges_from_file(self.test_neg_filename)
        test_edges.extend(test_edges_neg)

        # may exists isolated point
        score_res = []
        for i in range(len(test_edges)):
            score_res.append(
                np.dot(self.emd[test_edges[i][0]], self.emd[test_edges[i][1]]))
        test_label = np.array(score_res)
        bar = np.median(test_label)  #
        ind_pos = test_label >= bar
        ind_neg = test_label < bar
        test_label[ind_pos] = 1
        test_label[ind_neg] = 0
        true_label = np.zeros(test_label.shape)
        true_label[0:len(true_label) // 2] = 1

        accuracy = accuracy_score(true_label, test_label)

        return accuracy
Esempio n. 2
0
def main(args):
    G_train = nx.read_weighted_edgelist(args.train,
                                        nodetype=int,
                                        create_using=nx.Graph())
    G_test = nx.read_weighted_edgelist(args.test,
                                       nodetype=int,
                                       create_using=nx.Graph())
    vector = read_node_vectors(args.embedding, G_test)

    print("=====Compute AUC====")
    auc = []
    for node in tqdm(list(G_test.nodes())):
        try:
            auc.append(AUC_MR.compute(G_test, node, vector))
        except ValueError:
            continue
    auc_mean = float(sum(auc) / len(auc))

    print("=====Compute MR====")
    sequence_order = AUC_MR.result_rank(G_test, vector)
    mr = []
    for node in tqdm(G_test.nodes()):
        try:
            mr.append(AUC_MR.mean_rank(G_test, node, sequence_order))
        except ValueError:
            continue
    Mean_Rank = sum(mr) / len(mr)

    print("=====Compute ACC====")
    n_node = len(G_train.nodes())

    neg_sample_link = []
    for node in tqdm(G_test.nodes()):
        neg_sample_link.append(
            [node,
             ACC.generate_neg_link(G_test, args.negative_num, node)])
    np.savetxt("temp/negtive_link.txt",
               np.asarray(neg_sample_link),
               fmt="%s",
               newline="\n",
               delimiter="\t")

    test_edge = utils.read_edges_from_file(args.test)
    test_edge_neg = utils.read_edges_from_file("temp/negtive_link.txt")
    test_edge.extend(test_edge_neg)
    EMB, EMBMAP = utils.read_embeddings(args.embedding, n_node,
                                        args.dimensions)
    acc = ACC.eval_link_prediction(test_edge, EMB, EMBMAP)

    print("=====Show Results====")
    dataset_name = args.train.split("/")[-1].split(".")[0]
    tb = pt.PrettyTable()
    tb.field_names = ["dataset", "AUC", "MR", "ACC"]
    tb.add_row([dataset_name, auc_mean, Mean_Rank, acc])
    print(tb)
Esempio n. 3
0
    def eval_link_prediction(self):
        test_edges = utils.read_edges_from_file(self.test_filename)
        test_edges_neg = utils.read_edges_from_file(self.test_neg_filename)
        test_edges.extend(test_edges_neg)

        # may exists isolated point
        score_res = []
        for i in range(len(test_edges)):
            score_res.append(np.dot(self.emd[test_edges[i][0]], self.emd[test_edges[i][1]]))
        test_label = np.array(score_res)
        median = np.median(test_label)
        index_pos = test_label >= median
        index_neg = test_label < median
        test_label[index_pos] = 1
        test_label[index_neg] = 0
        true_label = np.zeros(test_label.shape)
        true_label[0: len(true_label) // 2] = 1
        accuracy = accuracy_score(true_label, test_label)

        return accuracy
Esempio n. 4
0
def eval_test(config):
    """do the evaluation when training

    :return:
    """
    results = []
    if config.app == "link_prediction":
        LPE = elp.LinkPredictEval(config.emb_filename, config.test_filename,
                                  config.test_neg_filename, config.n_node,
                                  config.n_embed)
        result = LPE.eval_link_prediction()
        results.append(config.model + ":" + str(result) + "\n")

    with open(config.result_filename, mode="a+") as f:
        f.writelines(results)

    test_edges = utils.read_edges_from_file(config.test_filename)
    test_edges_neg = utils.read_edges_from_file(config.test_neg_filename)
    test_edges.extend(test_edges_neg)
    emd = utils.read_emd(config.emb_filename,
                         n_node=config.n_node,
                         n_embed=config.n_embed)
    score_res = []
    for i in range(len(test_edges)):
        score_res.append(np.dot(emd[test_edges[i][0]], emd[test_edges[i][1]]))
    test_label = np.array(score_res)
    bar = np.median(test_label)
    ind_pos = test_label >= bar
    ind_neg = test_label < bar
    test_label[ind_pos] = 1
    test_label[ind_neg] = 0
    true_label = np.zeros(test_label.shape)
    true_label[0:len(true_label) // 2] = 1
    f1 = f1_score(true_label, test_label, average='macro')
    result = config.model + ":" + str(f1) + "\n"
    print(result)
    with open(config.result_filename_f1, mode="a+") as f:
        f.writelines(result)
Esempio n. 5
0
 def batch_collect_features(self, seed, _):
     ef = os.path.join(self.log_base,
                       ".coverededges" + str(random.randint(1, 100000000)))
     cf = os.path.join(self.log_base,
                       ".cmptmp" + str(random.randint(1, 100000000)))
     icf = os.path.join(self.log_base,
                        ".indcalltmp" + str(random.randint(1, 100000000)))
     ecf = os.path.join(self.log_base,
                        ".extcalltmp" + str(random.randint(1, 100000000)))
     lf = os.path.join(self.log_base,
                       ".sanlabeltmp" + str(random.randint(1, 100000000)))
     mopf = os.path.join(self.log_base,
                         ".moptmp" + str(random.randint(1, 1000000000)))
     envs = {
         "AFL_LOC_TRACE_FILE": ef,
         "CMP_LOG": cf,
         "INDIRECT_CALL_LOG": icf,
         "EXTERNAL_CALL_LOG": ecf,
         "SAVIOR_LABEL_LOG": lf,
         "MEM_OP_LOG": mopf,
         "ASAN_OPTIONS": "detect_odr_violation=0"
     }
     utils.run_one_with_envs(self.replay_prog_cmd, seed, self.input_mode,
                             envs, 0.2, _)
     cnum = utils.count_file_items(cf, with_weight=True)
     icnum = utils.count_file_items(icf, with_weight=True)
     ecnum = utils.count_file_items(ecf, with_weight=True)
     lnum = utils.count_file_items(lf, with_weight=True)
     mopnum = utils.count_file_items(mopf, with_weight=True)
     if os.path.exists(ef):
         self.input_to_edges_cache[seed] = utils.read_edges_from_file(ef)
         self.covered_fuzzer_edges |= set(self.input_to_edges_cache[seed])
         os.unlink(ef)
     if os.path.exists(cf):
         self.input_to_num_cmp_cache[seed] = cnum
         os.unlink(cf)
     if os.path.exists(icf):
         self.input_to_num_indcall_cache[seed] = icnum
         os.unlink(icf)
     if os.path.exists(ecf):
         self.input_to_num_extcall_cache[seed] = ecnum
         os.unlink(ecf)
     if os.path.exists(lf):
         self.input_to_num_label_cache[seed] = lnum
         os.unlink(lf)
     if os.path.exists(mopf):
         self.input_to_num_memops_cache[seed] = mopnum
         os.unlink(mopf)
Esempio n. 6
0
    def eval_link_prediction(self):
        """choose the topK after removing the positive training links
        Args:
            test_dataset:
        Returns:
            accuracy:
        """
        featureWt = utils.readFeatureWt(self.FeatureWtFilename)
        test_edges = utils.read_edges_from_file(self.test_filename)
        test_edges_neg = utils.read_edges_from_file(self.test_neg_filename)
        test_edges.extend(test_edges_neg)

        # may exists isolated point
        score_res = []
        wProduct = []
        if (self.FeatureWtFilename ==
                "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/src/GraphGAN/genFeatWeights.txt"
            ):
            #print("yes")
            featureWt = np.multiply(featureWt, featureWt)
        #featureWt=np.multiply(featureWt, featureWt)
        for i in range(len(test_edges)):
            embProduct = np.dot(self.emd[test_edges[i][0]],
                                self.emd[test_edges[i][1]])
            wProduct = np.dot(
                np.multiply(self.emdFeatures[test_edges[i][0]],
                            self.emdFeatures[test_edges[i][1]]), featureWt)
            score_res.append(embProduct + wProduct)
        with open(config.dotProduct, mode="a+") as f:
            f.writelines("emb" + str(embProduct) + "----wproduct----" +
                         str(wProduct))
            f.writelines("\n")
        test_label = np.array(score_res)
        bar = np.median(test_label)  #
        #bar=0.5
        #test------------------------------------median
        ind_pos = test_label >= bar
        ind_neg = test_label < bar
        test_label[ind_pos] = 1
        test_label[ind_neg] = 0
        true_label = np.zeros(test_label.shape)
        true_label[0:len(true_label) // 2] = 1
        accuracy = accuracy_score(true_label, test_label)

        #test------------------------------------softmax
        for i in range(0, len(score_res)):
            score_res[i] = 1 / (1 + math.exp(-score_res[i]))
        test_label1 = np.array(score_res)
        barSoftMax = 0.5
        ind_pos1 = test_label1 >= barSoftMax
        ind_neg1 = test_label1 < barSoftMax
        test_label1[ind_pos1] = 1
        test_label1[ind_neg1] = 0
        true_label1 = np.zeros(test_label1.shape)
        true_label1[0:len(true_label1) // 2] = 1
        accuracy1 = accuracy_score(true_label1, test_label1)
        res = []
        res.append(bar)
        res.append(accuracy)
        res.append(accuracy1)
        return (res)
Esempio n. 7
0
n_node = 3312
n_embed = 32
emdFeaturesFilename = "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/features/citeseerNew/features.txt"
nFeatures = 3703
#for generator---------------change here------------------------------------------------------------------
#FeatureWtFilename="/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/results/gplus/v_6_/disFeatWeights2.txt"
FeatureWtFilename = "/home/raavan/mountedDrives/home/ayush/twitterGanFEAT/pre_train/citeseerNew/citeseerNew20_Weights_gen_median_6510.emb"

#---------------------------------------------------------------------------------------------------------
#read
emd = utils.read_emd(embed_filename, n_node=n_node, n_embed=n_embed)
emdFeatures = utils.read_emd(emdFeaturesFilename,
                             n_node=n_node,
                             n_embed=nFeatures)
featureWt = utils.readFeatureWt(FeatureWtFilename)
test_edges_pos = utils.read_edges_from_file(test_pos_filename)
test_edges_neg = utils.read_edges_from_file(test_neg_filename)

#for generator---------------change here------------------------------------------------------------------
featureWt = np.multiply(featureWt, featureWt)
#---------------------------------------------------------------------------------------------------------
score_res_pos = []
score_res_neg = []
#score_res= []
wProduct = []
for i in range(len(test_edges_pos)):
    embProduct = np.dot(emd[test_edges_pos[i][0]], emd[test_edges_pos[i][1]])
    wProduct = np.dot(
        np.multiply(emdFeatures[test_edges_pos[i][0]],
                    emdFeatures[test_edges_pos[i][1]]), featureWt)
    score_res_pos.append(embProduct + wProduct)