Ejemplo n.º 1
0
    def build_graph(cls, args):
        number_edges = args.number_edges
        metric = args.metric
        normalized_laplacian = args.normalized_laplacian
        coarsening_levels = args.coarsening_levels
        def grid_graph(m, corners=False):
            z = graph.grid(m)
            # compute pairwise distance
            dist, idx = graph.distance_sklearn_metrics(z, k=number_edges, metric=metric)
            A = graph.adjacency(dist, idx) # build adjacent matrix
            # Connections are only vertical or horizontal on the grid.
            # Corner vertices are connected to 2 neightbors only.
            if corners:
                A = A.toarray()
                A[A < A.max()/1.5] = 0
                A = scipy.sparse.csr_matrix(A)
                print('{} edges'.format(A.nnz))

            print("{} > {} edges".format(A.nnz//2, number_edges*m**2//2))
            return A
        
        g = grid_graph(28, corners=False)
        g = graph.replace_random_edges(g, 0)
        graphs, perm = coarsening.coarsen(g, levels=coarsening_levels, self_connections=False)
        laplacians = [graph.laplacian(g, normalized=True) for g in graphs]
        
        cls.perm = perm
        cls.graphs = graphs
        cls.laplacians = laplacians
    def __init__(self, A, n_out=10):
        super(GraphCNN, self).__init__()

        # Precompute the coarsened graphs
        graphs, pooling_inds = coarsening.coarsen(A, levels=4)
        # In order to simulate 2x2 max pooling, combine the 4 levels
        # of graphs into 2 levels by combining pooling indices.
        graphs, pooling_inds = coarsening.combine(graphs, pooling_inds, 2)

        self.graph_layers = []
        sizes = [32, 64]
        for i, (g, inds, s) in enumerate(zip(graphs, pooling_inds, sizes)):
            f = GraphConvolution(None, s, g, K=25)
            self.add_link('gconv{}'.format(i), f)
            p = GraphMaxPoolingFunction(inds)
            self.graph_layers.append((f, p))

        self.linear_layers = []
        sizes = [512]
        for i, s in enumerate(sizes):
            f = L.Linear(None, s)
            self.add_link('l{}'.format(i), f)
            self.linear_layers.append(f)
        self.add_link('cls_layer', L.Linear(None, n_out))

        self.train = True
Ejemplo n.º 3
0
def build_laplacian(k):
    fullgraph = pickle.load(open(r"C:\Users\veronica\Desktop\study\Deep Learning\Project\full_interactions_graph", 'rb'))[0:k, 0:k]
    A = csr_matrix(fullgraph).astype(np.float32)
    graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False)
    L = [graph.laplacian(A, normalized=True) for A in graphs]
    pickle.dump(L, open("L"+str(k), 'wb'))
    pickle.dump(perm, open("prem"+str(k), 'wb'))
    return L, perm
Ejemplo n.º 4
0
    def gene_graph(self, A):
        coarsening_levels = 4

        L, perm = coarsen(A, coarsening_levels)
        train_data = perm_data(copy.copy(self.train_data), perm)
        test_data = perm_data(copy.copy(self.test_data), perm)
        self.layer1 = (L[0].shape)
        self.D = self.layer1[0]
        print(self.D)
        lmax = []
        for i in range(coarsening_levels + 1):
            lmax.append(lmax_L(L[i]))
        return train_data, test_data, self.train_label, self.test_label, L, lmax
Ejemplo n.º 5
0
 def build_graph(cls, args):
     number_edges = args.number_edges
     metric = args.metric
     normalized_laplacian = args.normalized_laplacian
     coarsening_levels = args.coarsening_levels
     data_dir = 'data/20news'
     embed_path = os.path.join(data_dir, 'embeddings.npy')
     graph_data = np.load(embed_path).astype(np.float32)
     dist, idx = graph.distance_sklearn_metrics(graph_data,
                                                k=number_edges,
                                                metric=metric)
     adj_matrix = graph.adjacency(dist, idx)
     print("{} > {} edges".format(adj_matrix.nnz // 2,
                                  number_edges * graph_data.shape[0] // 2))
     adj_matrix = graph.replace_random_edges(adj_matrix, 0)
     graphs, perm = coarsening.coarsen(adj_matrix,
                                       levels=coarsening_levels,
                                       self_connections=False)
     laplacians = [
         graph.laplacian(g, normalized=normalized_laplacian) for g in graphs
     ]
     cls.perm = perm
     cls.graphs = graphs
     cls.laplacians = laplacians
Ejemplo n.º 6
0
    X_train = X[:n_train, ...]
    X_val   = X[n_train:, ...]

    y_train = y[:n_train, ...]
    y_val   = y[n_train:, ...]

    A = np.load('/Neutron9/joyneel.misra/npys/meanFC_d'+str(d)+'.npy');
    A = A - np.min(A)
    A = scipy.sparse.csr_matrix(A)
    d = X.shape[1]

    assert A.shape == (d, d)
    print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz))

    graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False)

    X_train = coarsening.perm_data(X_train, perm)
    X_val = coarsening.perm_data(X_val, perm)

    L = [graph.laplacian(A, normalized=True) for A in graphs]
    L = [elem.astype(np.float32) for elem in L]

    params = dict()
    params['dir_name']       = 'demo'
    params['num_epochs']     = 10
    params['batch_size']     = 40
    params['eval_frequency'] = 100

    # Building blocks.
    params['filter']         = 'chebyshev5'
Ejemplo n.º 7
0
        A = A.toarray()
        A[A < A.max()/1.5] = 0
        A = scipy.sparse.csr_matrix(A)
        print('{} edges'.format(A.nnz))

    print("{} > {} edges".format(A.nnz//2, FLAGS.number_edges*m**2//2))
    return A



t_start = time.process_time()
A = grid_graph(28, corners=False)

graph.replace_random_edges(A, 0)

graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False)

L = [graph.laplacian(A, normalized=True,renormalized=True) for A in graphs]
#print(L.dtype)
#print(L)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
graph.plot_spectrum(L)
print("DONE")

del A
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
Ejemplo n.º 8
0
# ----- Load adjacency matrix of LB-operator or graph Laplacian ---------------
# L = D-W
print('Loading adjacency matrix ...')
f_adjacency = h5py.File(Adjfilename, 'r')
W = sparse.csr_matrix(
    (f_adjacency["W"]["data"], f_adjacency["W"]["ir"], f_adjacency["W"]["jc"]))
W = W.astype(np.float32)
print('Size of W: ', W.shape, '\n')

# ----- Graph coarsening ------------------------------------------------------
print('Graph coarsening  ...')
print('Original: |V| = {} nodes, |E| = {} edges'.format(
    W.shape[0], int(W.nnz / 3)))
graphs, perm = coarsening.coarsen(W,
                                  levels=coarsen_level,
                                  self_connections=False)

# exchange node ids so that binary unions form the clustering tree.
data_train = coarsening.perm_data(data_train, perm)
data_valid = coarsening.perm_data(data_valid, perm)
data_test = coarsening.perm_data(data_test, perm)

# ----- Update LB-operator or graph Laplacian for each coarsened level --------
L = [
    graph.laplacian(W2, normalized=params['normalized']).transpose()
    for W2 in graphs
]

# ----- Training and validation -----------------------------------------------
n_train = data_train.shape[0]  # Number of train samples.
Ejemplo n.º 9
0
def main():
    createFolder('Result')
    config_file = sys.argv[1]
    with open(config_file, 'r') as f:
        config = yaml.load(f)

    PPI_data = config["PPI_data"]
    Response_data = config["Response_data"]
    Gene_data = config["Gene_data"]
    n_fold = config["n_fold"]
    test_size = config["test_size"]
    num_epochs = config["num_epochs"]
    batch_size = config["batch_size"]
    brelu = config["brelu"]
    pool = config["pool"]
    regularization = config["regularization"]
    dropout = config["dropout"]
    learning_rate = config["learning_rate"]
    decay_rate = config["decay_rate"]
    momentum = config["momentum"]
    Name = config["Name"]
    F = config["F"]
    K = config["K"]
    p = config["p"]
    M = config["M"]

    data_PPI = pd.read_csv(PPI_data)
    data_PPI.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_IC50 = pd.read_csv(Response_data)
    data_IC50.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_Gene = pd.read_csv(Gene_data)
    data_Gene.drop(['Unnamed: 0'], axis='columns', inplace=True)
    data_Gene = np.array(data_Gene)

    df = np.array(data_PPI)
    A = coo_matrix(df, dtype=np.float32)
    print(A.nnz)
    graphs, perm = coarsening.coarsen(A, levels=6, self_connections=False)
    L = [graph.laplacian(A, normalized=True) for A in graphs]
    graph.plot_spectrum(L)

    n_fold = n_fold
    PCC = []
    SPC = []
    RMSE = []

    X_train, X_test, Y_train, Y_test = train_test_split(data_Gene,
                                                        data_IC50,
                                                        test_size=test_size,
                                                        shuffle=True,
                                                        random_state=20)

    for cv in range(n_fold):
        Y_pred = np.zeros([Y_test.shape[0], Y_test.shape[1]])
        Y_test = np.zeros([Y_test.shape[0], Y_test.shape[1]])
        j = 0
        for i in range(Y.test.shape[1]):
            data1 = data_IC50.iloc[:, i]
            data1 = np.array(data1)
            data_minmax = data1[~np.isnan(data1)]
            min = data_minmax.min()
            max = data_minmax.max()
            data1 = (data1 - min) / (max - min)

            train_data_split, test_data_split, train_labels_split, test_labels_split = train_test_split(
                data_Gene,
                data1,
                test_size=test_size,
                shuffle=True,
                random_state=20)
            train_data = np.array(
                train_data_split[~np.isnan(train_labels_split)]).astype(
                    np.float32)

            list_train, list_val = Validation(n_fold, train_data,
                                              train_labels_split)

            train_data_V = train_data[list_train[cv]]
            val_data = train_data[list_val[cv]]
            test_data = np.array(test_data_split[:]).astype(np.float32)
            train_labels = np.array(
                train_labels_split[~np.isnan(train_labels_split)]).astype(
                    np.float32)
            train_labels_V = train_labels[list_train[cv]]
            val_labels = train_labels[list_val[cv]]
            test_labels = np.array(test_labels_split[:]).astype(np.float32)
            train_data_V = coarsening.perm_data(train_data_V, perm)
            val_data = coarsening.perm_data(val_data, perm)
            test_data = coarsening.perm_data(test_data, perm)

            common = {}
            common['num_epochs'] = num_epochs
            common['batch_size'] = batch_size
            common['decay_steps'] = train_data.shape[0] / common['batch_size']
            common['eval_frequency'] = 10 * common['num_epochs']
            common['brelu'] = brelu
            common['pool'] = pool

            common['regularization'] = regularization
            common['dropout'] = dropout
            common['learning_rate'] = learning_rate
            common['decay_rate'] = decay_rate
            common['momentum'] = momentum
            common['F'] = F
            common['K'] = K
            common['p'] = p
            common['M'] = M

            if True:
                name = Name
                params = common.copy()

            model = models.cgcnn(L, **params)
            loss, t_step = model.fit(train_data_V, train_labels_V, val_data,
                                     val_labels)

            Y_pred[:, j] = model.predict(test_data)
            Y_test[:, j] = test_labels
            j = j + 1

        np.savez(('Result/GraphCNN_CV_{}'.format(cv)),
                 Y_true=Y_test,
                 Y_pred=Y_pred)
Ejemplo n.º 10
0
def prepare(dataset):
    # # MNIST
    if dataset == 'mnist':
        mnist = input_data.read_data_sets(
            'datasets', one_hot=False)  # load data in folder datasets/
        train_data = mnist.train.images.astype(np.float32)
        val_data = mnist.validation.images.astype(np.float32)
        test_data = mnist.test.images.astype(np.float32)
        train_labels = mnist.train.labels
        val_labels = mnist.validation.labels
        test_labels = mnist.test.labels
        print(train_data.shape)

        print(train_labels.shape)
        print(val_data.shape)
        print(val_labels.shape)
        print(test_data.shape)
        print(test_labels.shape)

        # Construct graph
        t_start = time.time()
        grid_side = 280
        number_edges = 8
        metric = 'euclidean'
        A = grid_graph(grid_side, number_edges,
                       metric)  # create graph of Euclidean grid
        print(A.shape)

    elif dataset == 'adni':
        t_start = time.time()
        train_data, test_data, train_labels, test_labels, A = load_data(
            train_rate=train_rate,
            thresh=thresh,
            binary=binary,
            num=num,
            state=state)  # 0.35
        #A = np.load(saved_path+"_graph_A_2.npy")
        #A = np.load(saved_path+"_init_graph.npy")
        #A = np.load("UTA/multi_30_20_3_300_found_graph.npy")
        #A = np.load("UTA/multi_30_20_3_300_found_graph.npy")
        #A = np.load("UTA/multi_30_20_3_800_found_graph_left_candidate_465.npy")
        #A = scipy.sparse.coo_matrix(A)
        # print(A)
        # print( scipy.sparse.coo_matrix(A))
        #np.save(saved_path+"_init_graph.npy",A.toarray())

        # print("data shape ====")
        # print(train_data.shape)
        # print(train_data[0][0])
        # print(train_labels.shape)
        # print(test_data.shape)
        # print(test_labels.shape)
        # print("data shape end")
        if verbose:
            fig = plt.figure()
            #定义画布为1*1个划分,并在第1个位置上进行作图
            ax = fig.add_subplot(111)
            #定义横纵坐标的刻度
            # ax.set_yticks(range(len(yLabel)))
            # ax.set_yticklabels(yLabel, fontproperties=font)
            # ax.set_xticks(range(len(xLabel)))
            # ax.set_xticklabels(xLabel)
            #作图并选择热图的颜色填充风格,这里选择hot
            #print("A===")
            #print(A)
            im = ax.imshow(A.toarray(), cmap=plt.cm.hot_r)
            #增加右侧的颜色刻度条
            plt.colorbar(im)
            #增加标题
            plt.title("This is the original graph")
            #show
            plt.show()

        # print(train_data.shape)
        # print(test_data.shape)
        # print("baseline: ", sum(test_labels)/test_labels.shape[0])

        # grid_side = 180 # 102
        # number_edges = 101
        # metric = 'euclidean'
        # print(A)

    # Compute coarsened graphs

    coarsening_levels = 4
    L, perm = coarsen(A, coarsening_levels)

    #print(L)

    global layer1
    layer1 = (L[0].shape)
    #print(perm)
    #print(set(perm))
    # Compute max eigenvalue of graph Laplacians
    lmax = []
    for i in range(coarsening_levels):
        lmax.append(lmax_L(L[i]))
    #print('lmax: ' + str([lmax[i] for i in range(coarsening_levels)]))

    # # Reindex nodes to satisfy a binary tree structure
    train_data = perm_data(train_data, perm)
    # val_data = perm_data(val_data, perm)
    test_data = perm_data(test_data, perm)
    #
    #print(train_data.shape)
    #print(val_data.shape)
    #print(test_data.shape)
    '''
    test part for update graph
    '''
    #a,b = update_graph(A)
    #print(a[0].shape == (144,144))
    #exit()
    '''
    test part for update graph
    '''
    #print('Execution time: {:.2f}s'.format(time.time() - t_start))
    del perm
    return train_data, train_labels, test_data, test_labels, L, lmax, A
Ejemplo n.º 11
0
def cross_validate_convNN(X,
                          y,
                          adjacency,
                          name_param,
                          value_param,
                          k,
                          num_levels=5):

    split_index = split_test_train_for_cv(X.shape[0], k_fold=k)
    graphs, perm = coarsening.coarsen(sp.csr_matrix(
        adjacency.astype(np.float32)),
                                      levels=num_levels,
                                      self_connections=False)

    accuracy = []
    loss = []
    for param_val in value_param:
        accuracy_param = []
        loss_param = []
        for k_ in range(k):
            test_samples = split_index[k_]
            train_samples = split_index[~(
                np.arange(split_index.shape[0]) == k_)].flatten()

            X_train = X[train_samples]
            X_test = X[test_samples]
            y_train = y[train_samples]
            y_test = y[test_samples]

            X_train = coarsening.perm_data(X_train, perm)
            X_test = coarsening.perm_data(X_test, perm)
            n_train = X_train.shape[0]

            L = [graph.laplacian(A, normalized=True) for A in graphs]

            # Conv NN parameters
            params = dict()
            params['dir_name'] = 'demo'
            params['num_epochs'] = 30
            params['batch_size'] = 30
            params['eval_frequency'] = 30

            # Building blocks.
            params['filter'] = 'chebyshev5'
            params['brelu'] = 'b1relu'
            params['pool'] = 'apool1'

            # Number of classes.
            C = y.max() + 1
            assert C == np.unique(y).size

            # Architecture.
            params['F'] = [4, 8]  # Number of graph convolutional filters.
            params['K'] = [3, 3]  # Polynomial orders.
            params['p'] = [2, 8]  # Pooling sizes.
            params['M'] = [
                256, C
            ]  # Output dimensionality of fully connected layers.

            # Optimization.
            params['regularization'] = 4e-5
            params['dropout'] = 1
            params['learning_rate'] = 3e-3
            params['decay_rate'] = 0.9
            params['momentum'] = 0.8
            params['decay_steps'] = n_train / params['batch_size']
            params[name_param] = param_val

            model = models.cgcnn(L, **params)
            test_acc, train_loss, t_step = model.fit(X_train, y_train, X_test,
                                                     y_test)
            accuracy_param.append([max(test_acc), np.mean(test_acc)])
            loss_param.append([max(train_loss), np.mean(train_loss)])
        print(np.array(accuracy_param))
        pm = np.mean(np.array(accuracy_param), axis=0)
        pl = np.mean(np.array(loss_param), axis=0)
        print(
            "IIIII Accuracy: %0.2f (max) %0.2f (mean) Loss: %0.2f (max) %0.2f (mean)"
            % (pm[0], pm[1], pl[0], pl[1]))
        accuracy.append(pm)
        loss.append(pl)
    return accuracy, loss