Esempio n. 1
0
def main(args):
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input, args.seed, weighted=args.weighted)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up, G, G_train,
                                testing_pos_edges, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)

    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up, node_list, labels,
                                    args.testingratio, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            ##### ADDED: variables to store prediction and ROC results
            auc_roc, auc_pr, accuracy, f1, prediction, fpr, tpr = result
            _results['results'] = dict(auc_roc=auc_roc,
                                       auc_pr=auc_pr,
                                       accuracy=accuracy,
                                       f1=f1
                                       #pred=prediction,
                                       #fpr=fpr,
                                       #tpr=tpr
                                       )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)

        ##### ADDED: Write predictions to predictions output file #####
        with open(args.predictions, 'w') as pf:
            np.savetxt(pf, prediction, fmt='%s', delimiter=',')
        pf.close()

        ##### ADDED: Write FPR and TPR to ROC output file #####
        fpr = list(fpr)
        tpr = list(tpr)
        roc = [fpr, tpr]
        with open(args.roc, 'w') as rf:
            writer = csv.writer(rf)
            writer.writerows(roc)
        rf.close()
Esempio n. 2
0
from bionev.evaluation import LinkPrediction, NodeClassification
from bionev.embed_train import embedding_training, load_embedding, read_node_labels, split_train_test_graph
from bionev.SVD.model import SVD_embedding
from bionev.utils import *

from utils2
G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(train_graph_filename, None, weighted=None)


GSnap, GNx= read_GRaph_SNAP_NX()

g = read_for_SVD(train_graph_filename, weighted=None)

SVD_embedding(g, "SVD_OutputFile", size=100)
embedding_look_up = load_embedding("SVD_OutputFile")
result = LinkPrediction(embedding_look_up, G, G_train, testing_pos_edges,0)
auc_roc, auc_pr, accuracy, f1 = result
_results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
print(_results)

import time
G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(train_graph_filename, None, weighted=None)
g = read_for_OpenNE(train_graph_filename, weighted=None)
model = hope.HOPE(g, 100)
model.save_embeddings("HOPE_OutputFile")
embedding_look_up = load_embedding("HOPE_OutputFile")
Esempio n. 3
0
def main(args):
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input, args.seed, weighted=args.weighted)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up, G, G_train,
                                testing_pos_edges, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)
    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up, node_list, labels,
                                    args.testingratio, args.seed)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            auc_roc, auc_pr, accuracy, f1 = result
            _results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)
Esempio n. 4
0
def main(trials=None):
    args = parse_args()
    if trials is not None:
        params = {
            'C': trials.suggest_loguniform('C', 1e-10, 1e10),
            'window_size': trials.suggest_int('window_size', 1, 20),
            'pro_steps': trials.suggest_int('pro_steps', 1, 20),
            'pro_mu': trials.suggest_uniform('pro_mu', -1.0, 1.0),
            'pro_theta': trials.suggest_uniform('pro_theta', -1.0, 1.0),
            'output': "%s_trial_%s" % (args.output, str(trials.number))
        }
        dargs = vars(args)
        dargs.update(params)
    print(args)

    seed = args.seed
    random.seed(seed)
    np.random.seed(seed)
    print('#' * 70)
    print('Embedding Method: %s, Evaluation Task: %s' %
          (args.method, args.task))
    print('#' * 70)

    if args.task == 'link-prediction':
        G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(
            args.input,
            args.seed,
            weighted=args.weighted,
            trial_number=trials.number)
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output)
        time1 = time.time()
        print('Begin evaluation...')
        result = LinkPrediction(embedding_look_up,
                                G,
                                G_train,
                                testing_pos_edges,
                                args.seed,
                                C=args.C)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
        os.remove(train_graph_filename)
    elif args.task == 'node-classification':
        if not args.label_file:
            raise ValueError("No input label file. Exit.")
        node_list, labels = read_node_labels(args.label_file)
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)
        embedding_look_up = load_embedding(args.output, node_list)
        time1 = time.time()
        print('Begin evaluation...')
        result = NodeClassification(embedding_look_up,
                                    node_list,
                                    labels,
                                    args.testingratio,
                                    args.seed,
                                    C=args.C)
        eval_time = time.time() - time1
        print('Prediction Task Time: %.2f s' % eval_time)
    else:
        train_graph_filename = args.input
        time1 = time.time()
        embedding_training(args, train_graph_filename)
        embed_train_time = time.time() - time1
        print('Embedding Learning Time: %.2f s' % embed_train_time)

    if args.eval_result_file and result:
        _results = dict(
            input=args.input,
            task=args.task,
            method=args.method,
            dimension=args.dimensions,
            user=getpass.getuser(),
            date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
            seed=args.seed,
        )

        if args.task == 'link-prediction':
            auc_roc, auc_pr, accuracy, f1 = result
            _results['results'] = dict(
                auc_roc=auc_roc,
                auc_pr=auc_pr,
                accuracy=accuracy,
                f1=f1,
            )
        else:
            accuracy, f1_micro, f1_macro = result
            _results['results'] = dict(
                accuracy=accuracy,
                f1_micro=f1_micro,
                f1_macro=f1_macro,
            )

        with open(args.eval_result_file, 'a+') as wf:
            print(json.dumps(_results, sort_keys=True), file=wf)

        if args.task == 'link-prediction':
            return auc_roc
        else:
            return accuracy