예제 #1
0
파일: data.py 프로젝트: yaniv256/spektral
def numpy_to_batch(X_list, A_list, E_list=None):
    """
    Converts a batch of graphs stored in lists (X, A, and optionally E) to the
    [batch mode](https://danielegrattarola.github.io/spektral/data/#batch-mode)
    by zero-padding all X, A and E matrices to have the same node dimensions
    (`N_max`).

    Each entry i of the lists should be associated to the same graph, i.e.,
    `X_list[i].shape[0] == A_list[i].shape[0] == E_list[i].shape[0]`.

    Note that if `A_list` contains sparse matrices, they will be converted to
    dense np.arrays, which can be expensice.

    :param X_list: a list of np.arrays of shape `(N, F)`;
    :param A_list: a list of np.arrays or sparse matrices of shape `(N, N)`;
    :param E_list: a list of np.arrays of shape `(N, N, S)`;
    :return:
        -  `X_out`: a rank 3 array of shape `(batch, N_max, F)`;
        -  `A_out`: a rank 2 array of shape `(batch, N_max, N_max)`;
        -  `E_out`: (only if `E_list` if given) a rank 2 array of shape
        `(batch, N_max, N_max, S)`;
    """
    N_max = max([a.shape[-1] for a in A_list])
    X_out = pad_jagged_array(X_list, (N_max, -1))
    # Convert sparse matrices to dense
    if hasattr(A_list[0], 'toarray'):
        A_list = [a.toarray() for a in A_list]
    A_out = pad_jagged_array(A_list, (N_max, N_max))
    if E_list is not None:
        E_out = pad_jagged_array(E_list, (N_max, N_max, -1))
        return X_out, A_out, E_out
    else:
        return X_out, A_out
예제 #2
0
def to_batch(x_list=None, a_list=None, e_list=None, mask=False):
    """
    Converts lists of node features, adjacency matrices and edge features to
    [batch mode](https://graphneural.network/data-modes/#batch-mode),
    by zero-padding all tensors to have the same node dimension `n_max`.

    Either the node features or the adjacency matrices must be provided as input.

    The i-th element of each list must be associated with the i-th graph.

    If `a_list` contains sparse matrices, they will be converted to dense
    np.arrays.

    The edge attributes of a graph can be represented as

    - a dense array of shape `(n_nodes, n_nodes, n_edge_features)`;
    - a sparse edge list of shape `(n_edges, n_edge_features)`;

    and they will always be returned as dense arrays.

    :param x_list: a list of np.arrays of shape `(n_nodes, n_node_features)`
    -- note that `n_nodes` can change between graphs;
    :param a_list: a list of np.arrays or scipy.sparse matrices of shape
    `(n_nodes, n_nodes)`;
    :param e_list: a list of np.arrays of shape
    `(n_nodes, n_nodes, n_edge_features)` or `(n_edges, n_edge_features)`;
    :param mask: bool, if True, node attributes will be extended with a binary mask that
    indicates valid nodes (the last feature of each node will be 1 if the node is valid
    and 0 otherwise). Use this flag in conjunction with layers.base.GraphMasking to
    start the propagation of masks in a model.

    :return: only if the corresponding list is given as input:

        -  `x`: np.array of shape `(batch, n_max, n_node_features)`;
        -  `a`: np.array of shape `(batch, n_max, n_max)`;
        -  `e`: np.array of shape `(batch, n_max, n_max, n_edge_features)`;
    """
    if a_list is None and x_list is None:
        raise ValueError("Need at least x_list or a_list")

    n_max = max(
        [x.shape[0] for x in (x_list if x_list is not None else a_list)])

    # Node features
    x_out = None
    if x_list is not None:
        if mask:
            x_list = [
                np.concatenate((x, np.ones((x.shape[0], 1))), -1)
                for x in x_list
            ]
        x_out = pad_jagged_array(x_list, (n_max, -1))

    # Adjacency matrix
    a_out = None
    if a_list is not None:
        if hasattr(a_list[0], "toarray"):  # Convert sparse to dense
            a_list = [a.toarray() for a in a_list]
        a_out = pad_jagged_array(a_list, (n_max, n_max))

    # Edge attributes
    e_out = None
    if e_list is not None:
        if e_list[0].ndim == 2:  # Sparse to dense
            for i in range(len(a_list)):
                a, e = a_list[i], e_list[i]
                e_new = np.zeros(a.shape + e.shape[-1:])
                e_new[np.nonzero(a)] = e
                e_list[i] = e_new
        e_out = pad_jagged_array(e_list, (n_max, n_max, -1))

    return tuple(out for out in [x_out, a_out, e_out] if out is not None)
예제 #3
0
def collate_labels_batch(y_list, node_level=False):
    if node_level:
        n_max = max([x.shape[0] for x in y_list])
        return pad_jagged_array(y_list, (n_max, -1))
    else:
        return np.array(y_list)
예제 #4
0
################################################################################
# LOAD DATA
################################################################################
dataset_name = 'ogbg-molesol'
dataset = GraphPropPredDataset(name=dataset_name)
n_out = dataset.num_tasks
N = max(g[0]['num_nodes'] for g in dataset)

idx = dataset.get_idx_split()
tr_idx, va_idx, te_idx = idx["train"], idx["valid"], idx["test"]

X, A, _, y = ogb.dataset_to_numpy(dataset, dtype='f8')
A = [a.toarray() for a in A]
F = X[0].shape[-1]
X = pad_jagged_array(X, (N, F))
A = pad_jagged_array(A, (N, N))
X_tr, A_tr, y_tr = X[tr_idx], A[tr_idx], y[tr_idx]
X_va, A_va, y_va = X[va_idx], A[va_idx], y[va_idx]
X_te, A_te, y_te = X[te_idx], A[te_idx], y[te_idx]

################################################################################
# BUILD MODEL
################################################################################
X_in = Input(shape=(N, F))
A_in = Input(shape=(N, N))

X_1 = GraphConv(32, activation='relu')([X_in, A_in])
X_1, A_1 = MinCutPool(N // 2)([X_1, A_in])
X_2 = GraphConv(32, activation='relu')([X_1, A_1])
X_3 = GlobalSumPool()(X_2)
예제 #5
0
def to_batch(x_list=None, a_list=None, e_list=None):
    """
    Converts lists of node features, adjacency matrices and (optionally) edge 
    features to [batch mode](https://danielegrattarola.github.io/spektral/data/#batch-mode),
    by zero-padding all tensors to have the same node dimension `n_max`.

    Either the node features or the adjacency matrices must be provided as input.

    The i-th element of each list must be associated with the i-th graph.

    If `a_list` contains sparse matrices, they will be converted to dense
    np.arrays, which can be expensive.

    The edge attributes of a graph can be represented as

    - a dense array of shape `(n_nodes, n_nodes, n_edge_features)`;
    - a sparse edge list of shape `(n_edges, n_edge_features)`;

    and they will always be returned as dense arrays.

    :param x_list: a list of np.arrays of shape `(n_nodes, n_node_features)`
    -- note that `n_nodes` can change between graphs;
    :param a_list: a list of np.arrays or scipy.sparse matrices of shape
    `(n_nodes, n_nodes)`;
    :param e_list: a list of np.arrays of shape
    `(n_nodes, n_nodes, n_edge_features)` or `(n_edges, n_edge_features)`;
    :return: only if the corresponding list is given as input:

        -  `x`: np.array of shape `(batch, n_max, n_node_features)`;
        -  `a`: np.array of shape `(batch, n_max, n_max)`;
        -  `e`: np.array of shape `(batch, n_max, n_max, n_edge_features)`;
    """
    if a_list is None and x_list is None:
        raise ValueError('Need at least x_list or a_list')

    n_max = max(
        [x.shape[0] for x in (x_list if x_list is not None else a_list)])

    # Node features
    x_out = None
    if x_list is not None:
        x_out = pad_jagged_array(x_list, (n_max, -1))

    # Adjacency matrix
    a_out = None
    if a_list is not None:
        if hasattr(a_list[0], 'toarray'):  # Convert sparse to dense
            a_list = [a.toarray() for a in a_list]
        a_out = pad_jagged_array(a_list, (n_max, n_max))

    # Edge attributes
    e_out = None
    if e_list is not None:
        if e_list[0].ndim == 2:  # Sparse to dense
            for i in range(len(a_list)):
                a, e = a_list[i], e_list[i]
                e_new = np.zeros(a.shape + e.shape[-1:])
                e_new[np.nonzero(a)] = e
                e_list[i] = e_new
        e_out = pad_jagged_array(e_list, (n_max, n_max, -1))

    return tuple(out for out in [x_out, a_out, e_out] if out is not None)