コード例 #1
0
def _one_hot_bonds(bonds: tf.Tensor) -> tf.Tensor:
    vocab_sizes = features.get_bond_feature_dims()
    one_hots = []
    for i in range(bonds.shape[1]):
        one_hots.append(
            tf.one_hot(bonds[:, i], vocab_sizes[i], dtype=tf.float32))
    return tf.concat(one_hots, axis=-1)
コード例 #2
0
    def get_tf_dataset(self):
        atom_feature_dims = get_atom_feature_dims()
        bond_feature_dims = get_bond_feature_dims()

        np.random.seed(23)
        data = {
            'nodes':
            np.random.randint(size=(self.n_examples, self.nodes_per_graph,
                                    len(atom_feature_dims)),
                              low=np.zeros_like(atom_feature_dims),
                              high=atom_feature_dims,
                              dtype=np.int32),
            'edges':
            np.random.randint(size=(self.n_examples, self.edges_per_graph,
                                    len(bond_feature_dims)),
                              low=np.zeros_like(bond_feature_dims),
                              high=bond_feature_dims,
                              dtype=np.int32),
            'edge_idx':
            np.stack([
                self.get_random_edge_idx() for _ in range(self.n_examples)
            ]).astype(np.int32),
            # binary ground truth
            'ground_truth':
            np.random.uniform(low=0, high=2,
                              size=(self.n_examples, 1)).astype(np.int32)
        }
        ds = tf.data.Dataset.from_tensor_slices(data)
        ds = ds.take(self.n_examples).cache().shuffle(32).repeat().batch(
            self.batch_size, drop_remainder=True)
        ds = ds.map(lambda b: self.batch_to_outputs(b))
        ds = ds.prefetch(1024)

        return ds
コード例 #3
0
    def __init__(self, emb_dim):
        super().__init__()

        self.emb_dim = emb_dim
        self.bond_embedding_list = []
        for dim in get_bond_feature_dims():
            self.bond_embedding_list.append(
                tf.keras.layers.Embedding(dim, emb_dim))
コード例 #4
0
def _sample_one_hot_bonds(bonds: tf.Tensor) -> tf.Tensor:
    vocab_sizes = features.get_bond_feature_dims()
    one_hots = []
    num_bonds = tf.shape(bonds)[0]
    for i in range(bonds.shape[1]):
        sampled_category = _sample_uniform_categorical(num_bonds,
                                                       vocab_sizes[i])
        one_hots.append(
            tf.one_hot(sampled_category, vocab_sizes[i], dtype=tf.float32))
    return tf.concat(one_hots, axis=-1)
コード例 #5
0
    def __init__(self, emb_dim):
        super(ExampleEdgeEncoder, self).__init__()

        self.bond_embedding_list = torch.nn.ModuleList()
        full_bond_feature_dims = get_bond_feature_dims()

        for i, dim in enumerate(full_bond_feature_dims):
            emb = torch.nn.Embedding(dim, emb_dim)
            torch.nn.init.xavier_uniform_(emb.weight.data)
            self.bond_embedding_list.append(emb)
コード例 #6
0
    def __init__(self, emb_dim):
        super().__init__()

        from ogb.utils.features import get_bond_feature_dims

        self.bond_embedding_list = torch.nn.ModuleList()

        for i, dim in enumerate(get_bond_feature_dims()):
            emb = torch.nn.Embedding(dim, emb_dim)
            torch.nn.init.xavier_uniform_(emb.weight.data)
            self.bond_embedding_list.append(emb)
コード例 #7
0
 def __call__(self, x):
     bond_feature = get_bond_feature_dims()
     bond_input = L.split(x, num_or_sections=len(bond_feature), dim=-1)
     outputs = None
     count = 0
     for _x, _bond_input_dim in zip(bond_input, bond_feature):
         count += 1
         emb = L.embedding(_x,
                           size=(_bond_input_dim, self.emb_dim),
                           param_attr=F.ParamAttr(name=self.name +
                                                  '_bond_feat_%s' % count))
         if outputs is None:
             outputs = emb
         else:
             outputs = outputs + emb
     return outputs
コード例 #8
0
import torch
from ogb.utils.features import get_atom_feature_dims, get_bond_feature_dims

full_atom_feature_dims = get_atom_feature_dims()
full_bond_feature_dims = get_bond_feature_dims()


class AtomEncoder(torch.nn.Module):
    def __init__(self, emb_dim):
        super(AtomEncoder, self).__init__()

        self.atom_embedding_list = torch.nn.ModuleList()

        for i, dim in enumerate(full_atom_feature_dims):
            emb = torch.nn.Embedding(dim, emb_dim)
            torch.nn.init.xavier_uniform_(emb.weight.data)
            self.atom_embedding_list.append(emb)

    def forward(self, x):
        x_embedding = 0
        for i in range(x.shape[1]):
            x_embedding += self.atom_embedding_list[i](x[:, i])

        return x_embedding


class BondEncoder(torch.nn.Module):
    def __init__(self, emb_dim):
        super(BondEncoder, self).__init__()

        self.bond_embedding_list = torch.nn.ModuleList()
コード例 #9
0
import torch
import torch.nn as nn
from typing import Union

from phc.quaternion.algebra import QTensor
from phc.quaternion.activations import get_functional_activation
from phc.quaternion.layers import quaternion_dropout, QLinear, RealTransformer
from phc.quaternion.norm import QuaternionNorm

from ogb.utils.features import get_atom_feature_dims, get_bond_feature_dims

ATOM_FEAT_DIMS = get_atom_feature_dims()
BOND_FEAT_DIMS = get_bond_feature_dims()
""" Quaternion Downstream Feed-Forward Network"""


class QuaternionDownstreamNet(nn.Module):
    """  A quaternion Feed-Forward Network which predicts a real-valued vector of dimension `out_features`. """
    def __init__(self,
                 in_features: int,
                 hidden_layers: list,
                 out_features: int,
                 activation: str,
                 bias: bool,
                 norm: str,
                 init: str,
                 dropout: Union[float, list],
                 same_dropout: bool = False,
                 real_trafo: str = "linear") -> None:

        super(QuaternionDownstreamNet, self).__init__()
コード例 #10
0
# Copyright (c) 2021 Graphcore Ltd. All rights reserved.

import numpy as np
import tensorflow as tf

from absl import flags
from ogb.utils.features import get_atom_feature_dims, get_bond_feature_dims

NODE_FEATURE_DIMS = len(get_atom_feature_dims())
EDGE_FEATURE_DIMS = len(get_bond_feature_dims())
FLAGS = flags.FLAGS


def np_batch_generator(n_nodes,
                       n_edges,
                       batch_size,
                       data_subset,
                       epochs=1,
                       shuffle=True):
    """
    generates the batches in numpy

    shape of batch is going to be:
    nodes:     [max_nodes, n_node_feat]
    edge_idx:  [max_edges, 2]
    edge_feat: [max_edges, n_edge_feat]
    labels:    [max_graphs, 1]

    :param n_nodes: the number of nodes per graph (on average)
    :param n_edges: the number of edges per graph (on average)
    :param batch_size: desired batch size (1 is reserved for the dummy graph)
コード例 #11
0
import unittest
import torch
import itertools

from ogb.utils.features import get_atom_feature_dims, get_bond_feature_dims
ATOM_FEATS, BOND_FEATS = get_atom_feature_dims(), get_bond_feature_dims()

from phc.quaternion.layers import QLinear
from phc.quaternion.encoder import IntegerEncoder, QuaternionEncoder

# quaternion undirectional models
from phc.quaternion.undirectional.models import QuaternionSkipConnectAdd as UQ_SC_ADD
from phc.quaternion.undirectional.models import QuaternionSkipConnectConcat as UQ_SC_CAT


def weights_flag(name):
    """ Select only the weight matrices of QLinear and nn.Embedding """
    return "W_" in name or "weight" in name and "b_" not in name and "bias" not in name and "bn" not in name


def check_qlinear(in_features: int,
                  out_features: int,
                  init: str,
                  norm_tol: float = 5.0,
                  nruns: int = 10) -> bool:
    weights = []
    module = QLinear(in_features, out_features, bias=False, init=init)
    for i in range(nruns):
        weights.append(module.W.stack(dim=0))
        module.reset_parameters()
コード例 #12
0
def main():
    args = get_parser()
    # get some argparse arguments that are parsed a bool string
    naive_encoder = not str2bool(args.full_encoder)
    pin_memory = str2bool(args.pin_memory)
    use_bias = str2bool(args.bias)
    downstream_bn = str(args.d_bn)
    same_dropout = str2bool(args.same_dropout)
    mlp_mp = str2bool(args.mlp_mp)

    phm_dim = args.phm_dim
    learn_phm = str2bool(args.learn_phm)

    base_dir = "pcba/"
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    if base_dir not in args.save_dir:
        args.save_dir = os.path.join(base_dir, args.save_dir)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    set_logging(save_dir=args.save_dir)
    logging.info(f"Creating log directory at {args.save_dir}.")
    with open(os.path.join(args.save_dir, "params.json"), 'w') as fp:
        json.dump(args.__dict__, fp)

    mp_layers = [int(item) for item in args.mp_units.split(',')]
    downstream_layers = [int(item) for item in args.d_units.split(',')]
    mp_dropout = [float(item) for item in args.dropout_mpnn.split(',')]
    dn_dropout = [float(item) for item in args.dropout_dn.split(',')]
    logging.info(
        f'Initialising model with {mp_layers} hidden units with dropout {mp_dropout} '
        f'and downstream units: {downstream_layers} with dropout {dn_dropout}.'
    )

    if args.pooling == "globalsum":
        logging.info("Using GlobalSum Pooling")
    else:
        logging.info("Using SoftAttention Pooling")

    logging.info(
        f"Using Adam optimizer with weight_decay ({args.weightdecay}) and regularization "
        f"norm ({args.regularization})")
    logging.info(
        f"Weight init: {args.w_init} \n Contribution init: {args.c_init}")

    # data
    dname = "ogbg-molpcba"
    transform = RemoveIsolatedNodes()
    # pre-transform doesnt work somehow..
    dataset = PygGraphPropPredDataset(
        name=dname,
        root="dataset")  #, pre_transform=transform, transform=None)
    evaluator = Evaluator(name=dname)
    split_idx = dataset.get_idx_split()
    train_data = dataset[split_idx["train"]]
    valid_data = dataset[split_idx["valid"]]
    test_data = dataset[split_idx["test"]]

    if PRE_TRAFO:
        # pre-transform in memory to overcome computations when training
        logging.info(
            "Pre-transforming graphs, to overcome computation in batching.")
        train_data_list = []
        valid_data_list = []
        test_data_list = []
        for data in train_data:
            train_data_list.append(transform(data))
        for data in valid_data:
            valid_data_list.append(transform(data))
        for data in test_data:
            test_data_list.append(transform(data))

        logging.info("finised. Initiliasing dataloaders")

        train_loader = DataLoader(train_data_list,
                                  batch_size=args.batch_size,
                                  drop_last=False,
                                  shuffle=True,
                                  num_workers=args.nworkers,
                                  pin_memory=pin_memory)
        valid_loader = DataLoader(valid_data_list,
                                  batch_size=args.batch_size,
                                  drop_last=False,
                                  shuffle=False,
                                  num_workers=args.nworkers,
                                  pin_memory=pin_memory)
        test_loader = DataLoader(test_data_list,
                                 batch_size=args.batch_size,
                                 drop_last=False,
                                 shuffle=False,
                                 num_workers=args.nworkers,
                                 pin_memory=pin_memory)
    else:
        train_loader = DataLoader(train_data,
                                  batch_size=args.batch_size,
                                  drop_last=False,
                                  shuffle=True,
                                  num_workers=args.nworkers,
                                  pin_memory=pin_memory)
        valid_loader = DataLoader(valid_data,
                                  batch_size=args.batch_size,
                                  drop_last=False,
                                  shuffle=False,
                                  num_workers=args.nworkers,
                                  pin_memory=pin_memory)
        test_loader = DataLoader(test_data,
                                 batch_size=args.batch_size,
                                 drop_last=False,
                                 shuffle=False,
                                 num_workers=args.nworkers,
                                 pin_memory=pin_memory)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    FULL_ATOM_FEATURE_DIMS = get_atom_feature_dims()
    FULL_BOND_FEATURE_DIMS = get_bond_feature_dims()

    # for hypercomplex model
    unique_phm = str2bool(args.unique_phm)
    if unique_phm:
        phm_rule = get_multiplication_matrices(phm_dim=args.phm_dim,
                                               type="phm")
        phm_rule = torch.nn.ParameterList(
            [torch.nn.Parameter(a, requires_grad=learn_phm) for a in phm_rule])
    else:
        phm_rule = None

    if args.aggr_msg == "pna" or args.aggr_node == "pna":
        # if PNA is used
        # Compute in-degree histogram over training data.
        deg = torch.zeros(6, dtype=torch.long)
        for data in dataset[split_idx['train']]:
            d = degree(data.edge_index[1],
                       num_nodes=data.num_nodes,
                       dtype=torch.long)
            deg += torch.bincount(d, minlength=deg.numel())
    else:
        deg = None

    aggr_kwargs = {
        "aggregators": ['mean', 'min', 'max', 'std'],
        "scalers": ['identity', 'amplification', 'attenuation'],
        "deg": deg,
        "post_layers": 1,
        "msg_scalers":
        str2bool(args.msg_scale
                 ),  # this key is for directional messagepassing layers.
        "initial_beta": 1.0,  # Softmax
        "learn_beta": True
    }

    if "quaternion" in args.type:
        if args.aggr_msg == "pna" or args.aggr_msg == "pna":
            logging.info("PNA not implemented for quaternion models.")
            raise NotImplementedError

    if args.type == "undirectional-quaternion-sc-add":
        logging.info(
            "Using Quaternion Undirectional MPNN with Skip Connection through Addition"
        )
        model = UQ_SC_ADD(atom_input_dims=FULL_ATOM_FEATURE_DIMS,
                          atom_encoded_dim=args.input_embed_dim,
                          bond_input_dims=FULL_BOND_FEATURE_DIMS,
                          naive_encoder=naive_encoder,
                          mp_layers=mp_layers,
                          dropout_mpnn=mp_dropout,
                          init=args.w_init,
                          same_dropout=same_dropout,
                          norm_mp=args.mp_norm,
                          add_self_loops=True,
                          msg_aggr=args.aggr_msg,
                          node_aggr=args.aggr_node,
                          mlp=mlp_mp,
                          pooling=args.pooling,
                          activation=args.activation,
                          real_trafo=args.real_trafo,
                          downstream_layers=downstream_layers,
                          target_dim=dataset.num_tasks,
                          dropout_dn=dn_dropout,
                          norm_dn=downstream_bn,
                          msg_encoder=args.msg_encoder,
                          **aggr_kwargs)
    elif args.type == "undirectional-quaternion-sc-cat":
        logging.info(
            "Using Quaternion Undirectional MPNN with Skip Connection through Concatenation"
        )
        model = UQ_SC_CAT(atom_input_dims=FULL_ATOM_FEATURE_DIMS,
                          atom_encoded_dim=args.input_embed_dim,
                          bond_input_dims=FULL_BOND_FEATURE_DIMS,
                          naive_encoder=naive_encoder,
                          mp_layers=mp_layers,
                          dropout_mpnn=mp_dropout,
                          init=args.w_init,
                          same_dropout=same_dropout,
                          norm_mp=args.mp_norm,
                          add_self_loops=True,
                          msg_aggr=args.aggr_msg,
                          node_aggr=args.aggr_node,
                          mlp=mlp_mp,
                          pooling=args.pooling,
                          activation=args.activation,
                          real_trafo=args.real_trafo,
                          downstream_layers=downstream_layers,
                          target_dim=dataset.num_tasks,
                          dropout_dn=dn_dropout,
                          norm_dn=downstream_bn,
                          msg_encoder=args.msg_encoder,
                          **aggr_kwargs)
    elif args.type == "undirectional-phm-sc-add":
        logging.info(
            "Using PHM Undirectional MPNN with Skip Connection through Addition"
        )
        model = UPH_SC_ADD(phm_dim=phm_dim,
                           learn_phm=learn_phm,
                           phm_rule=phm_rule,
                           atom_input_dims=FULL_ATOM_FEATURE_DIMS,
                           atom_encoded_dim=args.input_embed_dim,
                           bond_input_dims=FULL_BOND_FEATURE_DIMS,
                           naive_encoder=naive_encoder,
                           mp_layers=mp_layers,
                           dropout_mpnn=mp_dropout,
                           w_init=args.w_init,
                           c_init=args.c_init,
                           same_dropout=same_dropout,
                           norm_mp=args.mp_norm,
                           add_self_loops=True,
                           msg_aggr=args.aggr_msg,
                           node_aggr=args.aggr_node,
                           mlp=mlp_mp,
                           pooling=args.pooling,
                           activation=args.activation,
                           real_trafo=args.real_trafo,
                           downstream_layers=downstream_layers,
                           target_dim=dataset.num_tasks,
                           dropout_dn=dn_dropout,
                           norm_dn=downstream_bn,
                           msg_encoder=args.msg_encoder,
                           sc_type=args.sc_type,
                           **aggr_kwargs)

    elif args.type == "undirectional-phm-sc-cat":
        logging.info(
            "Using PHM Undirectional MPNN with Skip Connection through Concatenation"
        )
        model = UPH_SC_CAT(phm_dim=phm_dim,
                           learn_phm=learn_phm,
                           phm_rule=phm_rule,
                           atom_input_dims=FULL_ATOM_FEATURE_DIMS,
                           atom_encoded_dim=args.input_embed_dim,
                           bond_input_dims=FULL_BOND_FEATURE_DIMS,
                           naive_encoder=naive_encoder,
                           mp_layers=mp_layers,
                           dropout_mpnn=mp_dropout,
                           w_init=args.w_init,
                           c_init=args.c_init,
                           same_dropout=same_dropout,
                           norm_mp=args.mp_norm,
                           add_self_loops=True,
                           msg_aggr=args.aggr_msg,
                           node_aggr=args.aggr_node,
                           mlp=mlp_mp,
                           pooling=args.pooling,
                           activation=args.activation,
                           real_trafo=args.real_trafo,
                           downstream_layers=downstream_layers,
                           target_dim=dataset.num_tasks,
                           dropout_dn=dn_dropout,
                           norm_dn=downstream_bn,
                           msg_encoder=args.msg_encoder,
                           **aggr_kwargs)

    else:
        raise ModuleNotFoundError

    logging.info(
        f"Model consists of {model.get_number_of_params_()} trainable parameters"
    )
    # do runs
    test_best_epoch_metrics_arr = []
    test_last_epoch_metrics_arr = []
    val_metrics_arr = []

    for i in range(1, args.n_runs + 1):
        ogb_bestEpoch_test_metrics, ogb_lastEpoch_test_metric, ogb_val_metrics = do_run(
            i, model, args, transform, train_loader, valid_loader, test_loader,
            device, evaluator)

        test_best_epoch_metrics_arr.append(ogb_bestEpoch_test_metrics)
        test_last_epoch_metrics_arr.append(ogb_lastEpoch_test_metric)
        val_metrics_arr.append(ogb_val_metrics)

    logging.info(f"Performance of model across {args.n_runs} runs:")
    test_bestEpoch_perf = torch.tensor(test_best_epoch_metrics_arr)
    test_lastEpoch_perf = torch.tensor(test_last_epoch_metrics_arr)
    valid_perf = torch.tensor(val_metrics_arr)
    logging.info('===========================')
    logging.info(
        f'Final Test (best val-epoch) '
        f'"{evaluator.eval_metric}": {test_bestEpoch_perf.mean():.4f} ± {test_bestEpoch_perf.std():.4f}'
    )
    logging.info(
        f'Final Test (last-epoch) '
        f'"{evaluator.eval_metric}": {test_lastEpoch_perf.mean():.4f} ± {test_lastEpoch_perf.std():.4f}'
    )
    logging.info(
        f'Final (best) Valid "{evaluator.eval_metric}": {valid_perf.mean():.4f} ± {valid_perf.std():.4f}'
    )
コード例 #13
0
    def __init__(self, encoder_name, d_in_features, d_in_encoder,
                 d_out_encoder, **kwargs):

        super(DiscreteEmbedding, self).__init__()

        #-------------- various different embedding layers
        kwargs['init'] = None if 'init' not in kwargs else kwargs['init']

        self.encoder_name = encoder_name
        # d_in_features: input feature size (e.g. if already one hot encoded),
        # d_in_encoder: number of unique values that will be encoded (size of embedding vocabulary)

        #-------------- fill embedding with zeros
        if encoder_name == 'zero_encoder':
            self.encoder = zero_encoder(d_out_encoder)
            d_out = d_out_encoder

        #-------------- linear pojection
        elif encoder_name == 'linear':
            self.encoder = nn.Linear(d_in_features, d_out_encoder, bias=True)
            d_out = d_out_encoder

        #-------------- mlp
        elif encoder_name == 'mlp':
            self.encoder = mlp(d_in_features, d_out_encoder, d_out_encoder,
                               kwargs['seed'], kwargs['activation_mlp'],
                               kwargs['bn_mlp'])
            d_out = d_out_encoder

        #-------------- multi hot encoding of categorical data
        elif encoder_name == 'one_hot_encoder':
            self.encoder = one_hot_encoder(d_in_encoder)
            d_out = sum(d_in_encoder)

        #-------------- embedding of categorical data (linear projection without bias of one hot encodings)
        elif encoder_name == 'embedding':
            self.encoder = multi_embedding(d_in_encoder, d_out_encoder,
                                           kwargs['aggr'], kwargs['init'])
            if kwargs['aggr'] == 'concat':
                d_out = len(d_in_encoder) * d_out_encoder
            else:
                d_out = d_out_encoder

        #-------------- for ogb: multi hot encoding of node features
        elif encoder_name == 'atom_one_hot_encoder':
            full_atom_feature_dims = get_atom_feature_dims(
            ) if kwargs['features_scope'] == 'full' else get_atom_feature_dims(
            )[:2]
            self.encoder = one_hot_encoder(full_atom_feature_dims)
            d_out = sum(full_atom_feature_dims)

        #-------------- for ogb: multi hot encoding of edge features
        elif encoder_name == 'bond_one_hot_encoder':
            full_bond_feature_dims = get_bond_feature_dims(
            ) if kwargs['features_scope'] == 'full' else get_bond_feature_dims(
            )[:2]
            self.encoder = one_hot_encoder(full_bond_feature_dims)
            d_out = sum(full_bond_feature_dims)

        #-------------- for ogb: embedding of node features
        elif encoder_name == 'atom_encoder':
            self.encoder = AtomEncoder(d_out_encoder)
            d_out = d_out_encoder

        #-------------- for ogb: embedding of edge features
        elif encoder_name == 'bond_encoder':
            self.encoder = BondEncoder(emb_dim=d_out_encoder)
            d_out = d_out_encoder

        #-------------- no embedding, use as is
        elif encoder_name == 'None':
            self.encoder = None
            d_out = d_in_features

        else:
            raise NotImplementedError(
                'Encoder {} is not currently supported.'.format(encoder_name))

        self.d_out = d_out

        return