예제 #1
0
파일: graph.py 프로젝트: sj1104/Het
def mp_matrix(graph, device, system="Hetu", use_original_gcn_norm=False):
    norm = graph.gcn_norm(use_original_gcn_norm)
    if system == "Hetu":
        from hetu import ndarray
        mp_mat = ndarray.sparse_array(values=norm,
                                      indices=(graph.edge_index[1],
                                               graph.edge_index[0]),
                                      shape=(graph.num_nodes, graph.num_nodes),
                                      ctx=device)
        return mp_mat
    elif system == "Pytorch":
        import torch
        indices = np.vstack((graph.edge_index[1], graph.edge_index[0]))
        mp_mat = torch.sparse.FloatTensor(indices=torch.LongTensor(indices),
                                          values=torch.FloatTensor(norm),
                                          size=(graph.num_nodes,
                                                graph.num_nodes))
        return mp_mat.to(device)
    elif system == "tensorflow":
        import tensorflow as tf
        indices = np.vstack((graph.edge_index[1], graph.edge_index[0])).T
        shape = np.array([graph.num_nodes, graph.num_nodes], dtype=np.int64)
        mp_val = tf.compat.v1.SparseTensorValue(indices, norm, shape)
        return mp_val
    else:
        raise NotImplementedError
예제 #2
0
def test_sparse_matrix_multiply():
	density = 1e-3
	ctx = ndarray.gpu(0)
	x = scipy.sparse.rand(500, 7000,density=density,format='coo',dtype=np.float32)
	y = np.random.uniform(0, 10, size=(7000, 100)).astype(np.float32)
	mat_x = ndarray.sparse_array(x.data, (x.row, x.col), shape = [500, 7000], ctx=ctx)
	mat_y = ndarray.array(y, ctx=ctx)
	mat_z = ndarray.empty((500, 100), ctx=ctx)
	gpu_op.CuSparse_Csrmm(mat_x, False, mat_y, False, mat_z)
	z = mat_z.asnumpy()
	np.testing.assert_allclose(x.dot(y), z, rtol=1e-5)
예제 #3
0
def test_sparse_array_dense_vector_multiply():
	density = 1e-3
	ctx = ndarray.gpu(0)
	x = scipy.sparse.rand(500, 70000, density=density,format='coo',dtype=np.float32)
	y = np.random.uniform(0, 10, size=(70000, 1)).astype(np.float32)
	mat_x = ndarray.sparse_array(x.data, (x.row, x.col), shape = [500, 70000], ctx=ctx)
	arr_y = ndarray.array(y, ctx=ctx)
	arr_z = ndarray.empty((500, 1), ctx=ctx)
	trans = False
	gpu_op.CuSparse_Csrmv(mat_x, trans, arr_y, arr_z)
	z = arr_z.asnumpy()
	np.testing.assert_allclose(x.dot(y), z, rtol=1e-5)

	
	x = scipy.sparse.rand(70000, 500, density=density,format='coo',dtype=np.float32)
	y = np.random.uniform(0, 10, size=(70000, 1)).astype(np.float32)
	mat_x = ndarray.sparse_array(x.data, (x.row, x.col), shape = [70000, 500], ctx=ctx)
	arr_y = ndarray.array(y, ctx=ctx)
	arr_z = ndarray.empty((500, 1), ctx=ctx)
	trans = True
	gpu_op.CuSparse_Csrmv(mat_x, trans, arr_y, arr_z)
	z = arr_z.asnumpy()
	np.testing.assert_allclose(x.transpose().dot(y), z, rtol=1e-5)
예제 #4
0
def test(args):
    comm, device_id = ad.mpi_nccl_init()
    rank = comm.localRank.value
    size = comm.nRanks.value

    dataset_info = {
        'Reddit': [232965, 602, 41],
        'Proteins': [132534, 602, 8],
        'Arch': [1644228, 602, 10],
        'Products': [2449029, 100, 47]
    }

    node_count, num_features, num_classes = dataset_info[args.dataset]

    hidden_layer_size = 128
    if num_features < 128:
        hidden_layer_size = 64

    replication = args.replication

    node_Count_Self = row_num(node_count, rank // replication,
                              size // replication)
    node_Count_All = node_count

    _, _, row_groups, col_groups = get_proc_groups(size, replication)

    executor_ctx = ndarray.gpu(device_id)

    if size > 1:
        adj_part, data_part, row_part, col_part, input_part, label_part = load_data(
            args, size, replication, rank)
    else:
        adj_part, data_part, row_part, col_part, input_part, label_part = load_data_whole(
            args)

    adj_matrix = ndarray.sparse_array(data_part, (row_part, col_part),
                                      shape=adj_part.shape,
                                      ctx=executor_ctx)

    # train:val:test=6:2:2
    # Our optimization on distributed GNN algorithm does NOT affect the correctness!
    # Here due to the limitation of current slice_op, data is split continuously.
    # Continuous split is unfriendly for reordered graph data where nodes are already clustered.
    # Specifically, training on some node clusters and testing on other clusters may cause poor test accuracy.
    # The better way is to split data randomly!
    train_split, test_split = 0.6, 0.8
    train_node = int(train_split * node_Count_Self)
    test_node = int(test_split * node_Count_Self)

    A = ad.Variable(name="A", trainable=False)
    H = ad.Variable(name="H")
    np.random.seed(123)
    bounds = np.sqrt(6.0 / (num_features + hidden_layer_size))
    W1_val = np.random.uniform(low=-bounds,
                               high=bounds,
                               size=[num_features,
                                     hidden_layer_size]).astype(np.float32)
    W1 = ad.Variable(name="W1", value=W1_val)
    bounds = np.sqrt(6.0 / (num_classes + hidden_layer_size))
    np.random.seed(123)
    W2_val = np.random.uniform(low=-bounds,
                               high=bounds,
                               size=[hidden_layer_size,
                                     num_classes]).astype(np.float32)

    W2 = ad.Variable(name="W2", value=W2_val)
    y_ = ad.Variable(name="y_")

    z = ad.distgcn_15d_op(A, H, W1, node_Count_Self, node_Count_All, size,
                          replication, device_id, comm,
                          [row_groups, col_groups], True)
    H1 = ad.relu_op(z)
    y = ad.distgcn_15d_op(A, H1, W2, node_Count_Self, node_Count_All, size,
                          replication, device_id, comm,
                          [row_groups, col_groups], True)

    y_train = ad.slice_op(y, (0, 0), (train_node, num_classes))
    label_train = ad.slice_op(y_, (0, 0), (train_node, num_classes))

    y_test = ad.slice_op(y, (test_node, 0),
                         (node_Count_Self - test_node, num_classes))
    label_test = ad.slice_op(y_, (test_node, 0),
                             (node_Count_Self - test_node, num_classes))

    loss = ad.softmaxcrossentropy_op(y_train, label_train)
    loss_test = ad.softmaxcrossentropy_op(y_test, label_test)
    opt = optimizer.AdamOptimizer()
    train_op = opt.minimize(loss)

    executor = ad.Executor([loss, y, loss_test, train_op], ctx=executor_ctx)

    feed_dict = {
        A:
        adj_matrix,
        H:
        ndarray.array(input_part, ctx=executor_ctx),
        y_:
        ndarray.array(convert_to_one_hot(label_part, max_val=num_classes),
                      ctx=executor_ctx),
    }

    epoch_num = 100
    epoch_all, epoch_0 = 0, 0

    for i in range(epoch_num):
        epoch_start_time = time.time()
        results = executor.run(feed_dict=feed_dict)
        loss = results[0].asnumpy().sum()
        y_out = results[1]
        loss_test = results[2].asnumpy().sum()
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - epoch_start_time
        epoch_all += epoch_time
        if i == 0:
            epoch_0 = epoch_time

        print("[Epoch: %d, Rank: %d] Epoch time: %.3f, Total time: %.3f" %
              (i, rank, epoch_time, epoch_all))

        y_out_train, y_predict = y_out.asnumpy().argmax(
            axis=1)[:train_node], y_out.asnumpy().argmax(axis=1)[test_node:]
        label_train, label_test = label_part[:train_node], label_part[
            test_node:]
        train_acc = ndarray.array(np.array([(y_out_train == label_train).sum()
                                            ]),
                                  ctx=executor_ctx)
        test_acc = ndarray.array(np.array([(y_predict == label_test).sum()]),
                                 ctx=executor_ctx)
        train_loss = ndarray.array(np.array([loss]), ctx=executor_ctx)
        test_loss = ndarray.array(np.array([loss_test]), ctx=executor_ctx)

        if replication > 1:
            col_groups[rank % replication].dlarrayNcclAllReduce(
                test_acc, test_acc, ncclDataType_t.ncclFloat32,
                ncclRedOp_t.ncclSum)
            col_groups[rank % replication].dlarrayNcclAllReduce(
                test_loss, test_loss, ncclDataType_t.ncclFloat32,
                ncclRedOp_t.ncclSum)
            col_groups[rank % replication].dlarrayNcclAllReduce(
                train_acc, train_acc, ncclDataType_t.ncclFloat32,
                ncclRedOp_t.ncclSum)
            col_groups[rank % replication].dlarrayNcclAllReduce(
                train_loss, train_loss, ncclDataType_t.ncclFloat32,
                ncclRedOp_t.ncclSum)
        else:
            comm.dlarrayNcclAllReduce(test_acc, test_acc,
                                      ncclDataType_t.ncclFloat32,
                                      ncclRedOp_t.ncclSum)
            comm.dlarrayNcclAllReduce(test_loss, test_loss,
                                      ncclDataType_t.ncclFloat32,
                                      ncclRedOp_t.ncclSum)
            comm.dlarrayNcclAllReduce(train_acc, train_acc,
                                      ncclDataType_t.ncclFloat32,
                                      ncclRedOp_t.ncclSum)
            comm.dlarrayNcclAllReduce(train_loss, train_loss,
                                      ncclDataType_t.ncclFloat32,
                                      ncclRedOp_t.ncclSum)

        test_acc = float(
            test_acc.asnumpy()[0]) / (node_count - test_split * node_count)
        test_loss = test_loss.asnumpy()[0] / (node_count -
                                              test_split * node_count)
        train_acc = float(train_acc.asnumpy()[0]) / (train_split * node_count)
        train_loss = train_loss.asnumpy()[0] / (train_split * node_count)

        if rank == 0:
            print("[Epoch: %d] Train Loss: %.3f, Train Accuracy: %.3f, Test Loss: %.3f, Test Accuracy: %.3f"\
            %(i,train_loss, train_acc, test_loss, test_acc))

    avg_epoch_time = (epoch_all - epoch_0) / (epoch_num - 1)
    results = ndarray.array(np.array([epoch_all, avg_epoch_time]),
                            ctx=executor_ctx)
    comm.dlarrayNcclAllReduce(results,
                              results,
                              ncclDataType_t.ncclFloat32,
                              reduceop=ncclRedOp_t.ncclSum)
    results = results.asnumpy() / size

    if rank == 0:
        print("\nAverage Total Time: %.3f, Average Epoch Time: %.3f" %
              (results[0], results[1]))
예제 #5
0
def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)
    feed_dict = {}
    nparts = 4
    graph.add_self_loop()
    norm = graph.gcn_norm(True)
    graphs, edge_list, reindexed_edges = graph.part_graph(nparts)
    x_val = np.concatenate(list(map(lambda g: g.x, graphs)))
    y_concat = np.concatenate(list(map(lambda g: g.y, graphs)))
    y_val = convert_to_one_hot(
        y_concat, max_val=graph.num_classes)  # shape=(n, num_classes)
    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")
    feed_dict[x_] = ndarray.array(x_val, ctx=ctx)
    feed_dict[y_] = ndarray.array(y_val, ctx=ctx)
    gcn1 = PCGCN(graph.num_features, 16, npart=nparts)
    gcn2 = PCGCN(16, graph.num_classes, npart=nparts)
    mp_val = [[None for j in range(nparts)] for i in range(nparts)]
    use_sparse = [True for g in graphs]
    for i in range(nparts):
        for j in range(nparts):
            if i == j:
                edges = graphs[i].edge_index
            else:
                edges = pick_edges(reindexed_edges, edge_list[i][j])

            if i == j and use_sparse[i] == False:
                mp_val[i][j] = sparse.csr_matrix(
                    (norm[edge_list[i][j]], (edges[1], edges[0])),
                    shape=(graphs[j].num_nodes,
                           graphs[i].num_nodes)).toarray()
            else:
                mp_val[i][j] = ndarray.sparse_array(
                    values=norm[edge_list[i][j]],
                    indices=(edges[1], edges[0]),
                    shape=(graphs[j].num_nodes, graphs[i].num_nodes),
                    ctx=ctx)
            feed_dict[gcn1.mp[i][j]] = mp_val[i][j]
            feed_dict[gcn2.mp[i][j]] = mp_val[i][j]

    subgraph_size = list(map(lambda g: g.num_nodes, graphs))
    x = gcn1(x_, subgraph_size=subgraph_size, use_sparse=use_sparse)
    x = ad.relu_op(x)
    y = gcn2(x, subgraph_size=subgraph_size, use_sparse=use_sparse)
    # y_train = ad.slice_op(y, (0, 0), (train_split, graph.num_classes))

    # loss = ad.softmaxcrossentropy_op(y_train, y_)
    loss = ad.softmaxcrossentropy_op(y, y_)
    opt = optimizer.AdamOptimizer(0.01)
    train_op = opt.minimize(loss)
    executor = ad.Executor([loss, y, train_op], ctx=ctx)
    losses = []
    for i in range(num_epoch):
        loss_val, y_predicted, _ = executor.run(feed_dict=feed_dict)

        y_predicted = y_predicted.asnumpy().argmax(axis=1)
        acc = (y_predicted == y_concat).sum()
        losses.append(loss_val.asnumpy()[0])
        if i == 0:
            start_time = time.time()
        print("Train loss :", loss_val.asnumpy().mean())
        print("Val accuracy:", acc / len(y_predicted))
    print("Hetu time:", (time.time() - start_time) / 199)
    return losses