Exemple #1
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl import DGLGraph
import networkx as nx
# Load Pytorch as backend
dgl.load_backend('pytorch')
import numpy as np
from dgl.nn.pytorch import conv as dgl_conv
from dgl.data.utils import save_graphs,load_graphs
node_attr = ['degree','betweenness_centrality','path_len','pagerank','node_clustering_coefficient','identity']
edge_attr = ['timestamp']

def load_ws():
    g = nx.read_gpickle('ws_ori_attr.gpickle')
    g1 = dgl.DGLGraph()
    g1.from_networkx(g,node_attr)
    features = g1.ndata['degree']
    for i in node_attr:
        if i != 'degree':
            features = torch.cat((features,g1.ndata[i].view(5000,-1)),1)
    features = features.float()
    g1.ndata['features'] = features
    return g1

def load_ba():
    g = nx.read_gpickle('ba_ori_attr.gpickle')
    g1 = dgl.DGLGraph()
    g1.from_networkx(g,node_attr)
    features = g1.ndata['degree']
Exemple #2
0
import mxnet as mx
import networkx as nx
from copy import deepcopy
from mxnet import gluon

sys.path.insert(0, 'core/')
sys.path.insert(0, 'ml/')

import db_connection as db_con
import utils

from gnn_functions import prep_funcs as pr
from gnn_functions import gcn_nodeflow as gcn

import dgl
dgl.load_backend('mxnet')

GRAPH_FILE_PREFIX = '/tmp/graph_data'

VEC_TABLE_TEMPL = '{vec_table}'
VEC_TABLE1_TEMPL = '{vec_table1}'
VEC_TABLE2_TEMPL = '{vec_table2}'


def perform_train(g, batch_size, num_neighbors, num_hops, train_mask, net,
                  labels, loss_fcn, optimizer):
    """Executes the training in batches of samples from the provided training
    data."""
    # shuffle id set
    train_mask = train_mask.asnumpy()
    labels = labels.asnumpy()
Exemple #3
0
def Train(directory, epochs, aggregator, embedding_size, layers, dropout,
          slope, lr, wd, random_seed, ctx):
    dgl.load_backend('mxnet')
    random.seed(random_seed)
    np.random.seed(random_seed)
    mx.random.seed(random_seed)

    g, disease_ids_invmap, mirna_ids_invmap = build_graph(
        directory, random_seed=random_seed, ctx=ctx)
    samples = sample(directory, random_seed=random_seed)
    ID, IM = load_data(directory)

    print('## vertices:', g.number_of_nodes())
    print('## edges:', g.number_of_edges())
    print('## disease nodes:', nd.sum(g.ndata['type'] == 1).asnumpy())
    print('## mirna nodes:', nd.sum(g.ndata['type'] == 0).asnumpy())

    samples_df = pd.DataFrame(samples, columns=['miRNA', 'disease', 'label'])
    sample_disease_vertices = [
        disease_ids_invmap[id_] for id_ in samples[:, 1]
    ]
    sample_mirna_vertices = [
        mirna_ids_invmap[id_] + ID.shape[0] for id_ in samples[:, 0]
    ]

    kf = KFold(n_splits=5, shuffle=True, random_state=random_seed)
    train_index = []
    test_index = []
    for train_idx, test_idx in kf.split(samples[:, 2]):
        train_index.append(train_idx)
        test_index.append(test_idx)

    auc_result = []
    acc_result = []
    pre_result = []
    recall_result = []
    f1_result = []

    fprs = []
    tprs = []

    for i in range(len(train_index)):
        print(
            '------------------------------------------------------------------------------------------------------'
        )
        print('Training for Fold ', i + 1)

        samples_df['train'] = 0
        samples_df['test'] = 0

        samples_df['train'].iloc[train_index[i]] = 1
        samples_df['test'].iloc[test_index[i]] = 1

        train_tensor = nd.from_numpy(
            samples_df['train'].values.astype('int32')).copyto(ctx)
        test_tensor = nd.from_numpy(
            samples_df['test'].values.astype('int32')).copyto(ctx)

        edge_data = {'train': train_tensor, 'test': test_tensor}

        g.edges[sample_disease_vertices,
                sample_mirna_vertices].data.update(edge_data)
        g.edges[sample_mirna_vertices,
                sample_disease_vertices].data.update(edge_data)

        train_eid = g.filter_edges(lambda edges: edges.data['train']).astype(
            'int64')
        g_train = g.edge_subgraph(train_eid, preserve_nodes=True)
        g_train.copy_from_parent()

        # get the training set
        rating_train = g_train.edata['rating']
        src_train, dst_train = g_train.all_edges()
        # get the testing edge set
        test_eid = g.filter_edges(lambda edges: edges.data['test']).astype(
            'int64')
        src_test, dst_test = g.find_edges(test_eid)
        rating_test = g.edges[test_eid].data['rating']
        src_train = src_train.copyto(ctx)
        src_test = src_test.copyto(ctx)
        dst_train = dst_train.copyto(ctx)
        dst_test = dst_test.copyto(ctx)
        print('## Training edges:', len(train_eid))
        print('## Testing edges:', len(test_eid))

        # Train the model
        model = GNNMDA(
            GraphEncoder(embedding_size=embedding_size,
                         n_layers=layers,
                         G=g_train,
                         aggregator=aggregator,
                         dropout=dropout,
                         slope=slope,
                         ctx=ctx),
            BilinearDecoder(feature_size=embedding_size))

        model.collect_params().initialize(
            init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ctx=ctx)
        cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
        trainer = gluon.Trainer(model.collect_params(), 'adam', {
            'learning_rate': lr,
            'wd': wd
        })

        for epoch in range(epochs):
            start = time.time()
            for _ in range(10):
                with mx.autograd.record():
                    score_train = model(g_train, src_train, dst_train)
                    loss_train = cross_entropy(score_train,
                                               rating_train).mean()
                    loss_train.backward()
                trainer.step(1)

            h_val = model.encoder(g)
            score_val = model.decoder(h_val[src_test], h_val[dst_test])
            loss_val = cross_entropy(score_val, rating_test).mean()

            train_auc = metrics.roc_auc_score(
                np.squeeze(rating_train.asnumpy()),
                np.squeeze(score_train.asnumpy()))
            val_auc = metrics.roc_auc_score(np.squeeze(rating_test.asnumpy()),
                                            np.squeeze(score_val.asnumpy()))

            results_val = [
                0 if j < 0.5 else 1 for j in np.squeeze(score_val.asnumpy())
            ]
            accuracy_val = metrics.accuracy_score(rating_test.asnumpy(),
                                                  results_val)
            precision_val = metrics.precision_score(rating_test.asnumpy(),
                                                    results_val)
            recall_val = metrics.recall_score(rating_test.asnumpy(),
                                              results_val)
            f1_val = metrics.f1_score(rating_test.asnumpy(), results_val)

            end = time.time()

            print('Epoch:', epoch + 1,
                  'Train Loss: %.4f' % loss_train.asscalar(),
                  'Val Loss: %.4f' % loss_val.asscalar(),
                  'Acc: %.4f' % accuracy_val, 'Pre: %.4f' % precision_val,
                  'Recall: %.4f' % recall_val, 'F1: %.4f' % f1_val,
                  'Train AUC: %.4f' % train_auc, 'Val AUC: %.4f' % val_auc,
                  'Time: %.2f' % (end - start))

        h_test = model.encoder(g)
        score_test = model.decoder(h_test[src_test], h_test[dst_test])
        # loss_test = cross_entropy(score_test, rating_test).mean()

        fpr, tpr, thresholds = metrics.roc_curve(
            np.squeeze(rating_test.asnumpy()),
            np.squeeze(score_test.asnumpy()))
        test_auc = metrics.auc(fpr, tpr)

        results_test = [
            0 if j < 0.5 else 1 for j in np.squeeze(score_test.asnumpy())
        ]
        accuracy_test = metrics.accuracy_score(rating_test.asnumpy(),
                                               results_test)
        precision_test = metrics.precision_score(rating_test.asnumpy(),
                                                 results_test)
        recall_test = metrics.recall_score(rating_test.asnumpy(), results_test)
        f1_test = metrics.f1_score(rating_test.asnumpy(), results_test)

        print('Fold:', i + 1, 'Test Acc: %.4f' % accuracy_test,
              'Test Pre: %.4f' % precision_test,
              'Test Recall: %.4f' % recall_test, 'Test F1: %.4f' % f1_test,
              'Test AUC: %.4f' % test_auc)

        auc_result.append(test_auc)
        acc_result.append(accuracy_test)
        pre_result.append(precision_test)
        recall_result.append(recall_test)
        f1_result.append(f1_test)

        fprs.append(fpr)
        tprs.append(tpr)

    print('## Training Finished !')
    print(
        '----------------------------------------------------------------------------------------------------------'
    )

    return auc_result, acc_result, pre_result, recall_result, f1_result, fprs, tprs