Ejemplo n.º 1
0
def main(args):
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input, args.seed, weighted=args.weighted)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up, G, G_train,
                                testing_pos_edges, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)

    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up, node_list, labels,
                                    args.testingratio, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            ##### ADDED: variables to store prediction and ROC results
            auc_roc, auc_pr, accuracy, f1, prediction, fpr, tpr = result
            _results['results'] = dict(auc_roc=auc_roc,
                                       auc_pr=auc_pr,
                                       accuracy=accuracy,
                                       f1=f1
                                       #pred=prediction,
                                       #fpr=fpr,
                                       #tpr=tpr
                                       )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)

        ##### ADDED: Write predictions to predictions output file #####
        with open(args.predictions, 'w') as pf:
            np.savetxt(pf, prediction, fmt='%s', delimiter=',')
        pf.close()

        ##### ADDED: Write FPR and TPR to ROC output file #####
        fpr = list(fpr)
        tpr = list(tpr)
        roc = [fpr, tpr]
        with open(args.roc, 'w') as rf:
            writer = csv.writer(rf)
            writer.writerows(roc)
        rf.close()
Ejemplo n.º 2
0
def main(args):
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input, args.seed, weighted=args.weighted)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up, G, G_train,
                                testing_pos_edges, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)
    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up, node_list, labels,
                                    args.testingratio, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            auc_roc, auc_pr, accuracy, f1 = result
            _results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)
Ejemplo n.º 3
0
    vectors[word] = word2vec.wv[word]
del word2vec

filename="C:/git/BioNEV/src/node2vec Embeddings.txt"
fout = open(filename, 'w')
node_num = len(vectors.keys())
fout.write("{} {}\n".format(node_num, 100))
for node, vec in vectors.items():
    fout.write("{} {}\n".format(node,
                                ' '.join([str(x) for x in vec])))
fout.close()

############################################################################################
_results = dict()

G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(train_graph_filename, None, weighted=None)
g = read_for_SVD(train_graph_filename, weighted=None)

EMBED="C:/git/BioNEV/embeddings/SVD_OutputFile"

SVD_embedding(g, EMBED, size=100)
embedding_look_up = load_embedding(EMBED)
result = LinkPrediction(embedding_look_up, G, G_train, testing_pos_edges,0)
auc_roc, auc_pr, accuracy, f1 = result
_results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
print(_results)
Ejemplo n.º 4
0
        )



train_graph_filename= "C:/git/BioNEV/data/DrugBank_DDI/DrugBank_DDI.edgelist"
train_graph_filename= "C:/git/BioNEV/data/DrugBank_DDI/Karate.edgelist"
G2=Graph()
#G..read_weighted_edgelist(train_graph_filename)
G1 = nx.read_weighted_edgelist(train_graph_filename)
G2.read_g(G1)

from bionev.OpenNE import node2vec
from bionev.OpenNE import gf, grarep, hope, lap, line, node2vec, sdne
from bionev.utils import *

G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(args.input, args.seed, weighted=args.weighted)
G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(train_graph_filename, None, weighted=False)



model = node2vec.Node2vec(graph=G_, path_length=args.walk_length,
                                      num_paths=args.number_walks, dim=args.dimensions,
                                      workers=args.workers, p=args.p, q=args.q, window=args.window_size)


g1 = read_for_OpenNE(train_graph_filename, weighted=False)
model = node2vec.Node2vec(g1,path_length=64,num_paths=32,dim=100, workers=8,p=1,q=1,window=10)
print("test")

model = node2vecmodel.fit(window=10,min_count=1)
# Save embeddings for later use
Ejemplo n.º 5
0
def main(trials=None):
    args = parse_args()
    if trials is not None:
        params = {
            'C': trials.suggest_loguniform('C', 1e-10, 1e10),
            'window_size': trials.suggest_int('window_size', 1, 20),
            'pro_steps': trials.suggest_int('pro_steps', 1, 20),
            'pro_mu': trials.suggest_uniform('pro_mu', -1.0, 1.0),
            'pro_theta': trials.suggest_uniform('pro_theta', -1.0, 1.0),
            'output': "%s_trial_%s" % (args.output, str(trials.number))
        }
        dargs = vars(args)
        dargs.update(params)
    print(args)

    seed = args.seed
    random.seed(seed)
    np.random.seed(seed)
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input,
            args.seed,
            weighted=args.weighted,
            trial_number=trials.number)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up,
                                G,
                                G_train,
                                testing_pos_edges,
                                args.seed,
                                C=args.C)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)
    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up,
                                    node_list,
                                    labels,
                                    args.testingratio,
                                    args.seed,
                                    C=args.C)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            auc_roc, auc_pr, accuracy, f1 = result
            _results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)

        if args.task == 'link-prediction':
            return auc_roc
        else:
            return accuracy