Esempio n. 1
0
def generate_graph_matrices(graphs, auto_pad=False):
    
    
    A, X, E = nx_to_numpy(graphs, nf_keys=['atomic_num'],
                               ef_keys=['bond_type'], auto_pad=auto_pad, self_loops=True)
    
    uniq_X = np.unique([v for x in X for v in np.unique(x)])
    X = [label_to_one_hot(x, uniq_X) for x in X]
    uniq_E = np.unique([v for x in E for v in np.unique(x)])
    E = [label_to_one_hot(x, uniq_E) for x in E]
    
    return A, X, E
Esempio n. 2
0
    def read(self):
        f = np.load(osp.join(self.path, "dblp.npz"))

        x = sp.csr_matrix(
            (f["attr_data"], f["attr_indices"], f["attr_indptr"]), f["attr_shape"]
        ).toarray()
        x[x > 0] = 1

        if self.normalize_x:
            print("Pre-processing node features")
            x = _preprocess_features(x)

        a = sp.csr_matrix(
            (f["adj_data"], f["adj_indices"], f["adj_indptr"]), f["adj_shape"]
        )  # .tocoo()

        y = f["labels"]
        y = label_to_one_hot(y, np.unique(y))

        return [
            Graph(
                x=x.astype(self.dtype),
                a=a.astype(self.dtype),
                y=y.astype(self.dtype),
            )
        ]
Esempio n. 3
0
    def __call__(self, graph):
        if self.labels is not None:
            graph.y = label_to_one_hot(graph.y, self.labels)
        else:
            graph.y = one_hot(graph.y, self.depth)

        return graph
Esempio n. 4
0
def atom_to_feature(atom):
    atomic_num = label_to_one_hot(atom["atomic_num"], ATOM_TYPES)
    coords = atom["coords"]
    charge = atom["charge"]
    iso = atom["iso"]

    return np.concatenate((atomic_num, coords, [charge, iso]), -1)
Esempio n. 5
0
def atom_to_feature(atom):
    atomic_num = label_to_one_hot(atom['atomic_num'], ATOM_TYPES)
    coords = atom['coords']
    charge = atom['charge']
    iso = atom['iso']

    return np.concatenate((atomic_num, coords, [charge, iso]), -1)
Esempio n. 6
0
def mol_to_adj(mol):
    row, col, edge_attr = [], [], []
    for bond in mol["bonds"]:
        start, end = bond["start_atom"], bond["end_atom"]
        row += [start, end]
        col += [end, start]
        edge_attr += [bond["type"]] * 2

    a = sp.csr_matrix((np.ones_like(row), (row, col)))
    edge_attr = np.array([label_to_one_hot(e, BOND_TYPES) for e in edge_attr])
    return a, edge_attr
Esempio n. 7
0
def generate_graph_matrices(graphs, auto_pad=False):
    """
    Generate A, X, E matrix from smiles
    :param graphs: list of networkx graphs
    :param auto_pad: bool. whether to pad the node matrix to have the same length
    :return A, X, E
    """

    A, X, E = nx_to_numpy(graphs,
                          nf_keys=['atomic_num'],
                          ef_keys=['bond_type'],
                          auto_pad=auto_pad,
                          self_loops=True)

    uniq_X = np.unique([v for x in X for v in np.unique(x)])
    X = [label_to_one_hot(x, uniq_X) for x in X]
    uniq_E = np.unique([v for x in E for v in np.unique(x)])
    E = [label_to_one_hot(x, uniq_E) for x in E]

    return A, X, E
Esempio n. 8
0
def mol_to_adj(mol):
    row, col, edge_features = [], [], []
    for bond in mol["bonds"]:
        start, end = bond["start_atom"], bond["end_atom"]
        row += [start, end]
        col += [end, start]
        edge_features += [bond["type"]] * 2

    a, e = sparse.edge_index_to_matrix(
        edge_index=np.array((row, col)).T,
        edge_weight=np.ones_like(row),
        edge_features=label_to_one_hot(edge_features, BOND_TYPES),
    )

    return a, e
Esempio n. 9
0
    def read(self):
        f = np.load(osp.join(self.path, "adj_full.npz"))
        a = sp.csr_matrix((f["data"], f["indices"], f["indptr"]), f["shape"])

        x = np.load(osp.join(self.path, "feats.npy"))

        if self.normalize_x:
            print("Pre-processing node features")
            x = _preprocess_features(x)

        y = np.zeros(x.shape[0])
        with open(osp.join(self.path, "class_map.json")) as f:
            class_map = json.load(f)
            for key, item in class_map.items():
                y[int(key)] = item

        y = label_to_one_hot(y, np.unique(y))

        with open(osp.join(self.path, "role.json")) as f:
            role = json.load(f)

        self.train_mask = np.zeros(x.shape[0], dtype=bool)
        self.train_mask[np.array(role["tr"])] = 1

        self.val_mask = np.zeros(x.shape[0], dtype=bool)
        self.val_mask[np.array(role["va"])] = 1

        self.test_mask = np.zeros(x.shape[0], dtype=bool)
        self.test_mask[np.array(role["te"])] = 1

        return [
            Graph(
                x=x.astype(self.dtype),
                a=a.astype(self.dtype),
                y=y.astype(self.dtype),
            )
        ]
Esempio n. 10
0
# LOAD DATA
################################################################################
A, X, E, y = qm9.load_data(return_type='numpy',
                           nf_keys='atomic_num',
                           ef_keys='type',
                           self_loops=True,
                           amount=1000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
X_uniq = np.unique(X)
X_uniq = X_uniq[X_uniq != 0]
E_uniq = np.unique(E)
E_uniq = E_uniq[E_uniq != 0]

X = label_to_one_hot(X, X_uniq)
E = label_to_one_hot(E, E_uniq)

# Parameters
N = X.shape[-2]       # Number of nodes in the graphs
F = X[0].shape[-1]    # Dimension of node features
S = E[0].shape[-1]    # Dimension of edge features
n_out = y.shape[-1]   # Dimension of the target

# Train/test split
A_train, A_test, \
X_train, X_test, \
E_train, E_test, \
y_train, y_test = train_test_split(A, X, E, y, test_size=0.1, random_state=0)

################################################################################
Esempio n. 11
0
from spektral.layers import GraphConv, GlobalAvgPool
from spektral.utils import Batch, batch_iterator
from spektral.utils import label_to_one_hot

# Load data
A, X, _, y = qm9.load_data(return_type='numpy',
                           nf_keys='atomic_num',
                           ef_keys='type',
                           self_loops=True,
                           auto_pad=False,
                           amount=1000)
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
uniq_X = np.unique([v for x in X for v in np.unique(x)])
X = [label_to_one_hot(x, uniq_X) for x in X]
y = StandardScaler().fit_transform(y).reshape(-1, y.shape[-1])

# Parameters
F = X[0].shape[-1]    # Dimension of node features
n_out = y.shape[-1]   # Dimension of the target
learning_rate = 1e-3  # Learning rate
epochs = 25           # Number of training epochs
batch_size = 64       # Batch size

# Train/test split
A_train, A_test, \
X_train, X_test, \
y_train, y_test = train_test_split(A, X, y, test_size=0.1)

# Model definition
Esempio n. 12
0
def generate_data(classes=0, n_samples_in_class=1000, n_nodes=7, support_low=0.,
                  support_high=10., drift_amount=1.0, one_hot_labels=True,
                  support=None, seed=None, return_type='numpy'):
    """
    Generates a dataset of Delaunay triangulations as described by
    [Zambon et al. (2017)](https://arxiv.org/abs/1706.06941).

    Node attributes are the 2D coordinates of the points.
    Two nodes are connected if they share an edge in the Delaunay triangulation.
    Labels represent the class of the graph (0 to 20, each class index i
    represent the "difficulty" of the classification problem 0 v. i. In other
    words, the higher the class index, the more similar the class is to class 0).

    :param classes: indices of the classes to load (integer, or list of integers
    between 0 and 20);
    :param n_samples_in_class: number of generated samples per class;
    :param n_nodes: number of nodes in a graph;
    :param support_low: lower bound of the uniform distribution from which the 
    support is generated;
    :param support_high: upper bound of the uniform distribution from which the 
    support is generated;
    :param drift_amount: coefficient to control the amount of change between 
    classes;
    :param one_hot_labels: one-hot encode dataset labels;
    :param support: custom support to use instead of generating it randomly; 
    :param seed: random numpy seed;
    :param return_type: `'numpy'` or `'networkx'`, data format to return;
    :return:
    - if `return_type='numpy'`, the adjacency matrix, node features, and
    an array containing labels;
    - if `return_type='networkx'`, a list of graphs in Networkx format, and an
    array containing labels;
    """
    if return_type not in RETURN_TYPES:
        raise ValueError('Possible return_type: {}'.format(RETURN_TYPES))

    if isinstance(classes, int):
        classes = [classes]

    if max(classes) > 20 or min(classes) < 0:
        raise ValueError('Class indices must be between 0 and 20')

    r_classes = list(reversed(classes))
    if r_classes[-1] == 0:
        r_classes.insert(0, r_classes.pop(-1))

    # Support points
    np.random.seed(seed)
    if support is None:
        support = np.random.uniform(support_low, support_high, (1, n_nodes, 2))
    else:
        try:
            assert support.shape == (1, n_nodes, 2)
        except AssertionError:
            print('The given support doesn\'t have shape (1, n_nodes, 2) as'
                  'expected. Attempting to reshape.')
            support = support.reshape(1, n_nodes, 2)

    # Compute node features
    node_features = []
    # Other node features
    for idx, i in enumerate(r_classes):
        if i == 0:
            concept_0 = np.repeat(support, n_samples_in_class, 0)
            noise_0 = np.random.normal(0, 1, (n_samples_in_class, n_nodes, 2))
            class_0 = concept_0 + noise_0
            node_features.append(class_0)
        else:
            radius = 10. * ((2./3.) ** (drift_amount * (i - 1)))
            phase = np.random.uniform(0, 2 * np.pi, (n_nodes, 1))
            perturb_i_x = radius * np.cos(phase)
            perturb_i_y = radius * np.sin(phase)
            perturb_i = np.concatenate((perturb_i_x, perturb_i_y), axis=-1)
            support_i = support + perturb_i
            concept_i = np.repeat(support_i, n_samples_in_class, 0)
            noise_i = np.random.normal(0, 1, (n_samples_in_class, n_nodes, 2))
            class_i = concept_i + noise_i
            node_features.append(class_i)
    node_features = np.array(node_features).reshape((-1, n_nodes, 2))

    # Compute adjacency matrices
    adjacency = []
    for nf in node_features:
        adj = _compute_adj(nf)
        adjacency.append(adj)
    adjacency = np.array(adjacency)

    # Compute labels
    labels = np.repeat(classes, n_samples_in_class)
    if one_hot_labels:
        labels = label_to_one_hot(labels, labels=classes)

    if return_type is 'numpy':
        return adjacency, node_features, labels
    elif return_type is 'networkx':
        graphs = numpy_to_nx(adjacency, node_features=node_features, nf_name='coords')
        return graphs, labels
    else:
        raise NotImplementedError
Esempio n. 13
0
from sklearn.preprocessing import StandardScaler

from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalAttentionPool
from spektral.utils import init_logging, label_to_one_hot

# Load data
adj, nf, ef, y = qm9.load_data(return_type='numpy',
                               nf_keys='atomic_num',
                               ef_keys='type',
                               self_loops=True)
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
uniq_nf = np.unique(nf)
nf = label_to_one_hot(nf, uniq_nf)
uniq_ef = np.unique(ef)
ef = label_to_one_hot(ef, uniq_ef)
y = StandardScaler().fit_transform(y).reshape(-1, y.shape[-1])

# Parameters
N = nf.shape[-2]  # Number of nodes in the graphs
F = nf.shape[-1]  # Node features dimensionality
S = ef.shape[-1]  # Edge features dimensionality
n_out = y.shape[-1]  # Dimensionality of the target
learning_rate = 1e-3  # Learning rate for SGD
epochs = 25  # Number of training epochs
batch_size = 64  # Batch size
es_patience = 5  # Patience fot early stopping
log_dir = init_logging()  # Create log directory and file
Esempio n. 14
0
A, X, E, y = qm9.load_data(
    return_type='numpy',
    nf_keys='atomic_num',
    ef_keys='type',
    self_loops=False,
    auto_pad=False,
    amount=1000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
X_uniq = np.unique([v for x in X for v in np.unique(x)])
E_uniq = np.unique([v for e in E for v in np.unique(e)])
X_uniq = X_uniq[X_uniq != 0]
E_uniq = E_uniq[E_uniq != 0]

X = [label_to_one_hot(x, labels=X_uniq) for x in X]
E = [label_to_one_hot(e, labels=E_uniq) for e in E]

# Parameters
F = X[0].shape[-1]  # Dimension of node features
S = E[0].shape[-1]  # Dimension of edge features
n_out = y.shape[-1]  # Dimension of the target

# Train/test split
A_train, A_test, \
X_train, X_test, \
E_train, E_test, \
y_train, y_test = train_test_split(A, X, E, y, test_size=0.1, random_state=0)

################################################################################
# BUILD MODEL
Esempio n. 15
0
np.random.seed(0)
SW_KEY = 'dense_1_sample_weights:0'  # Keras automatically creates a placeholder for sample weights, which must be fed

# Load data
A, X, E, y = qm9.load_data(
    return_type='numpy',
    nf_keys='atomic_num',
    ef_keys='type',
    self_loops=True,
    auto_pad=False,
    amount=1000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
uniq_X = np.unique([v for x in X for v in np.unique(x)])
X = [label_to_one_hot(x, uniq_X) for x in X]
uniq_E = np.unique([v for e in E for v in np.unique(e)])
uniq_E = uniq_E[uniq_E != 0]
E = [label_to_one_hot(e, uniq_E) for e in E]

# Parameters
F = X[0].shape[-1]  # Dimension of node features
S = E[0].shape[-1]  # Dimension of edge features
n_out = y.shape[-1]  # Dimension of the target
learning_rate = 1e-3  # Learning rate
epochs = 25  # Number of training epochs
batch_size = 32  # Batch size

# Train/test split
A_train, A_test, \
X_train, X_test, \
Esempio n. 16
0
from spektral.datasets import qm9
from spektral.layers import EdgeConditionedConv, GlobalAvgPool
from spektral.utils import label_to_one_hot

# Load data
A, X, E, y = qm9.load_data(return_type='numpy',
                           nf_keys='atomic_num',
                           ef_keys='type',
                           self_loops=True,
                           amount=1000)  # Set to None to train on whole dataset
y = y[['cv']].values  # Heat capacity at 298.15K

# Preprocessing
uniq_X = np.unique(X)
uniq_X = uniq_X[uniq_X != 0]
X = label_to_one_hot(X, uniq_X)
uniq_E = np.unique(E)
uniq_E = uniq_E[uniq_E != 0]
E = label_to_one_hot(E, uniq_E)

# Parameters
N = X.shape[-2]           # Number of nodes in the graphs
F = X.shape[-1]           # Node features dimensionality
S = E.shape[-1]           # Edge features dimensionality
n_out = y.shape[-1]       # Dimensionality of the target
learning_rate = 1e-3      # Learning rate for SGD
epochs = 25               # Number of training epochs
batch_size = 32           # Batch size
es_patience = 5           # Patience fot early stopping

# Train/test split