Beispiel #1
0
 def create_loss(self):
     return L2Loss()
Beispiel #2
0
  def __init__(self,
               n_tasks: int,
               graph_attention_layers: list = None,
               n_attention_heads: int = 8,
               agg_modes: list = None,
               activation=F.elu,
               residual: bool = True,
               dropout: float = 0.,
               alpha: float = 0.2,
               predictor_hidden_feats: int = 128,
               predictor_dropout: float = 0.,
               mode: str = 'regression',
               number_atom_features: int = 30,
               n_classes: int = 2,
               self_loop: bool = True,
               **kwargs):
    """
    Parameters
    ----------
    n_tasks: int
      Number of tasks.
    graph_attention_layers: list of int
      Width of channels per attention head for GAT layers. graph_attention_layers[i]
      gives the width of channel for each attention head for the i-th GAT layer. If
      both ``graph_attention_layers`` and ``agg_modes`` are specified, they should have
      equal length. If not specified, the default value will be [8, 8].
    n_attention_heads: int
      Number of attention heads in each GAT layer.
    agg_modes: list of str
      The way to aggregate multi-head attention results for each GAT layer, which can be
      either 'flatten' for concatenating all-head results or 'mean' for averaging all-head
      results. ``agg_modes[i]`` gives the way to aggregate multi-head attention results for
      the i-th GAT layer. If both ``graph_attention_layers`` and ``agg_modes`` are
      specified, they should have equal length. If not specified, the model will flatten
      multi-head results for intermediate GAT layers and compute mean of multi-head results
      for the last GAT layer.
    activation: activation function or None
      The activation function to apply to the aggregated multi-head results for each GAT
      layer. If not specified, the default value will be ELU.
    residual: bool
      Whether to add a residual connection within each GAT layer. Default to True.
    dropout: float
      The dropout probability within each GAT layer. Default to 0.
    alpha: float
      A hyperparameter in LeakyReLU, which is the slope for negative values. Default to 0.2.
    predictor_hidden_feats: int
      The size for hidden representations in the output MLP predictor. Default to 128.
    predictor_dropout: float
      The dropout probability in the output MLP predictor. Default to 0.
    mode: str
      The model type, 'classification' or 'regression'. Default to 'regression'.
    number_atom_features: int
      The length of the initial atom feature vectors. Default to 30.
    n_classes: int
      The number of classes to predict per task
      (only used when ``mode`` is 'classification'). Default to 2.
    self_loop: bool
      Whether to add self loops for the nodes, i.e. edges from nodes to themselves.
      When input graphs have isolated nodes, self loops allow preserving the original feature
      of them in message passing. Default to True.
    kwargs
      This can include any keyword argument of TorchModel.
    """
    model = GAT(
        n_tasks=n_tasks,
        graph_attention_layers=graph_attention_layers,
        n_attention_heads=n_attention_heads,
        agg_modes=agg_modes,
        activation=activation,
        residual=residual,
        dropout=dropout,
        alpha=alpha,
        predictor_hidden_feats=predictor_hidden_feats,
        predictor_dropout=predictor_dropout,
        mode=mode,
        number_atom_features=number_atom_features,
        n_classes=n_classes)
    if mode == 'regression':
      loss: Loss = L2Loss()
      output_types = ['prediction']
    else:
      loss = SparseSoftmaxCrossEntropy()
      output_types = ['prediction', 'loss']
    super(GATModel, self).__init__(
        model, loss=loss, output_types=output_types, **kwargs)

    self._self_loop = self_loop
Beispiel #3
0
    def __init__(self,
                 dist_kernel: str = 'softmax',
                 n_encoders=8,
                 lambda_attention: float = 0.33,
                 lambda_distance: float = 0.33,
                 h: int = 16,
                 sa_hsize: int = 1024,
                 sa_dropout_p: float = 0.0,
                 output_bias: bool = True,
                 d_input: int = 1024,
                 d_hidden: int = 1024,
                 d_output: int = 1024,
                 activation: str = 'leakyrelu',
                 n_layers: int = 1,
                 ff_dropout_p: float = 0.0,
                 encoder_hsize: int = 1024,
                 encoder_dropout_p: float = 0.0,
                 embed_input_hsize: int = 36,
                 embed_dropout_p: float = 0.0,
                 gen_aggregation_type: str = 'mean',
                 gen_dropout_p: float = 0.0,
                 gen_n_layers: int = 1,
                 gen_attn_hidden: int = 128,
                 gen_attn_out: int = 4,
                 gen_d_output: int = 1,
                 **kwargs):
        """The wrapper class for the Molecular Attention Transformer.

    Since we are using a custom data class as input (MATEncoding), we have overriden the default_generator function from DiskDataset and customized it to work with a batch of MATEncoding classes.

    Parameters
    ----------
    dist_kernel: str
        Kernel activation to be used. Can be either 'softmax' for softmax or 'exp' for exponential, for the self-attention layer.
    n_encoders: int
        Number of encoder layers in the encoder block.
    lambda_attention: float
        Constant to be multiplied with the attention matrix in the self-attention layer.
    lambda_distance: float
        Constant to be multiplied with the distance matrix in the self-attention layer.
    h: int
        Number of attention heads for the self-attention layer.
    sa_hsize: int
        Size of dense layer in the self-attention layer.
    sa_dropout_p: float
        Dropout probability for the self-attention layer.
    output_bias: bool
        If True, dense layers will use bias vectors in the self-attention layer.
    d_input: int
        Size of input layer in the feed-forward layer.
    d_hidden: int
        Size of hidden layer in the feed-forward layer. Will also be used as d_output for the MATEmbedding layer.
    d_output: int
        Size of output layer in the feed-forward layer.
    activation: str
        Activation function to be used in the feed-forward layer.
        Can choose between 'relu' for ReLU, 'leakyrelu' for LeakyReLU, 'prelu' for PReLU,
        'tanh' for TanH, 'selu' for SELU, 'elu' for ELU and 'linear' for linear activation.
    n_layers: int
        Number of layers in the feed-forward layer.
    ff_dropout_p: float
        Dropout probability in the feeed-forward layer.
    encoder_hsize: int
        Size of Dense layer for the encoder itself.
    encoder_dropout_p: float
        Dropout probability for connections in the encoder layer.
    embed_input_hsize: int
        Size of input layer for the MATEmbedding layer.
    embed_dropout_p: float
        Dropout probability for the MATEmbedding layer.
    gen_aggregation_type: str
        Type of aggregation to be used. Can be 'grover', 'mean' or 'contextual'.
    gen_dropout_p: float
        Dropout probability for the MATGenerator layer.
    gen_n_layers: int
        Number of layers in MATGenerator.
    gen_attn_hidden: int
        Size of hidden attention layer in the MATGenerator layer.
    gen_attn_out: int
        Size of output attention layer in the MATGenerator layer.
    gen_d_output: int
        Size of output layer in the MATGenerator layer.
    """
        model = MAT(dist_kernel=dist_kernel,
                    n_encoders=n_encoders,
                    lambda_attention=lambda_attention,
                    lambda_distance=lambda_distance,
                    h=h,
                    sa_hsize=sa_hsize,
                    sa_dropout_p=sa_dropout_p,
                    output_bias=output_bias,
                    d_input=d_input,
                    d_hidden=d_hidden,
                    d_output=d_output,
                    activation=activation,
                    n_layers=n_layers,
                    ff_dropout_p=ff_dropout_p,
                    encoder_hsize=encoder_hsize,
                    encoder_dropout_p=encoder_dropout_p,
                    embed_input_hsize=embed_input_hsize,
                    embed_dropout_p=embed_dropout_p,
                    gen_aggregation_type=gen_aggregation_type,
                    gen_dropout_p=gen_dropout_p,
                    gen_n_layers=gen_n_layers,
                    gen_attn_hidden=gen_attn_hidden,
                    gen_attn_out=gen_attn_out,
                    gen_d_output=gen_d_output)

        loss = L2Loss()
        output_types = ['prediction']
        super(MATModel, self).__init__(model,
                                       loss=loss,
                                       output_types=output_types,
                                       **kwargs)
Beispiel #4
0
  def __init__(self,
               frag1_num_atoms=70,
               frag2_num_atoms=634,
               complex_num_atoms=701,
               max_num_neighbors=12,
               batch_size=24,
               atom_types=[
                   6, 7., 8., 9., 11., 12., 15., 16., 17., 20., 25., 30., 35.,
                   53., -1.
               ],
               radial=[[
                   1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0,
                   7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0
               ], [0.0, 4.0, 8.0], [0.4]],
               layer_sizes=[32, 32, 16],
               learning_rate=0.001,
               **kwargs):
    """   
    Params
    ------
    frag1_num_atoms: int
      Number of atoms in first fragment
    frag2_num_atoms: int
      Number of atoms in sec
    max_num_neighbors: int
      Maximum number of neighbors possible for an atom. Recall neighbors
      are spatial neighbors.
    atom_types: list
      List of atoms recognized by model. Atoms are indicated by their
      nuclear numbers.
    radial: list
      TODO: add description
    layer_sizes: list
      TODO: add description
    learning_rate: float
      Learning rate for the model.
    """
    # TODO: Turning off queue for now. Safe to re-activate?
    self.complex_num_atoms = complex_num_atoms
    self.frag1_num_atoms = frag1_num_atoms
    self.frag2_num_atoms = frag2_num_atoms
    self.max_num_neighbors = max_num_neighbors
    self.batch_size = batch_size
    self.atom_types = atom_types

    rp = [x for x in itertools.product(*radial)]
    frag1_X = Input(shape=(frag1_num_atoms, 3))
    frag1_nbrs = Input(shape=(frag1_num_atoms, max_num_neighbors))
    frag1_nbrs_z = Input(shape=(frag1_num_atoms, max_num_neighbors))
    frag1_z = Input(shape=(frag1_num_atoms,))

    frag2_X = Input(shape=(frag2_num_atoms, 3))
    frag2_nbrs = Input(shape=(frag2_num_atoms, max_num_neighbors))
    frag2_nbrs_z = Input(shape=(frag2_num_atoms, max_num_neighbors))
    frag2_z = Input(shape=(frag2_num_atoms,))

    complex_X = Input(shape=(complex_num_atoms, 3))
    complex_nbrs = Input(shape=(complex_num_atoms, max_num_neighbors))
    complex_nbrs_z = Input(shape=(complex_num_atoms, max_num_neighbors))
    complex_z = Input(shape=(complex_num_atoms,))

    frag1_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([frag1_X, frag1_nbrs, frag1_nbrs_z])

    frag2_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([frag2_X, frag2_nbrs, frag2_nbrs_z])

    complex_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([complex_X, complex_nbrs, complex_nbrs_z])

    score = AtomicConvScore(self.atom_types, layer_sizes)(
        [frag1_conv, frag2_conv, complex_conv, frag1_z, frag2_z, complex_z])

    model = tf.keras.Model(
        inputs=[
            frag1_X, frag1_nbrs, frag1_nbrs_z, frag1_z, frag2_X, frag2_nbrs,
            frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z,
            complex_z
        ],
        outputs=score)
    super(AtomicConvModel, self).__init__(
        model, L2Loss(), batch_size=batch_size, **kwargs)
Beispiel #5
0
    def __init__(self,
                 n_tasks: int,
                 node_out_feats: int = 64,
                 edge_hidden_feats: int = 128,
                 num_step_message_passing: int = 3,
                 num_step_set2set: int = 6,
                 num_layer_set2set: int = 3,
                 mode: str = 'regression',
                 number_atom_features: int = 30,
                 number_bond_features: int = 11,
                 n_classes: int = 2,
                 self_loop: bool = False,
                 **kwargs):
        """
    Parameters
    ----------
    n_tasks: int
      Number of tasks.
    node_out_feats: int
      The length of the final node representation vectors. Default to 64.
    edge_hidden_feats: int
      The length of the hidden edge representation vectors. Default to 128.
    num_step_message_passing: int
      The number of rounds of message passing. Default to 3.
    num_step_set2set: int
      The number of set2set steps. Default to 6.
    num_layer_set2set: int
      The number of set2set layers. Default to 3.
    mode: str
      The model type, 'classification' or 'regression'. Default to 'regression'.
    number_atom_features: int
      The length of the initial atom feature vectors. Default to 30.
    number_bond_features: int
      The length of the initial bond feature vectors. Default to 11.
    n_classes: int
      The number of classes to predict per task
      (only used when ``mode`` is 'classification'). Default to 2.
    self_loop: bool
      Whether to add self loops for the nodes, i.e. edges from nodes to themselves.
      Generally, an MPNNModel does not require self loops. Default to False.
    kwargs
      This can include any keyword argument of TorchModel.
    """
        model = MPNN(n_tasks=n_tasks,
                     node_out_feats=node_out_feats,
                     edge_hidden_feats=edge_hidden_feats,
                     num_step_message_passing=num_step_message_passing,
                     num_step_set2set=num_step_set2set,
                     num_layer_set2set=num_layer_set2set,
                     mode=mode,
                     number_atom_features=number_atom_features,
                     number_bond_features=number_bond_features,
                     n_classes=n_classes)
        if mode == 'regression':
            loss: Loss = L2Loss()
            output_types = ['prediction']
        else:
            loss = SparseSoftmaxCrossEntropy()
            output_types = ['prediction', 'loss']
        super(MPNNModel, self).__init__(model,
                                        loss=loss,
                                        output_types=output_types,
                                        **kwargs)

        self._self_loop = self_loop
Beispiel #6
0
    def __init__(
            self,
            n_tasks: int,
            frag1_num_atoms: int = 70,
            frag2_num_atoms: int = 634,
            complex_num_atoms: int = 701,
            max_num_neighbors: int = 12,
            batch_size: int = 24,
            atom_types: Sequence[float] = [
                6, 7., 8., 9., 11., 12., 15., 16., 17., 20., 25., 30., 35.,
                53., -1.
            ],
            radial: Sequence[Sequence[float]] = [[
                1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0,
                7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0
            ], [0.0, 4.0, 8.0], [0.4]],
            # layer_sizes=[32, 32, 16],
            layer_sizes=[100],
            weight_init_stddevs: OneOrMany[float] = 0.02,
            bias_init_consts: OneOrMany[float] = 1.0,
            weight_decay_penalty: float = 0.0,
            weight_decay_penalty_type: str = "l2",
            dropouts: OneOrMany[float] = 0.5,
            activation_fns: OneOrMany[ActivationFn] = tf.nn.relu,
            residual: bool = False,
            learning_rate=0.001,
            **kwargs) -> None:
        """
    Parameters
    ----------
    n_tasks: int
      number of tasks
    frag1_num_atoms: int
      Number of atoms in first fragment
    frag2_num_atoms: int
      Number of atoms in sec
    max_num_neighbors: int
      Maximum number of neighbors possible for an atom. Recall neighbors
      are spatial neighbors.
    atom_types: list
      List of atoms recognized by model. Atoms are indicated by their
      nuclear numbers.
    radial: list
      Radial parameters used in the atomic convolution transformation.
    layer_sizes: list
      the size of each dense layer in the network.  The length of
      this list determines the number of layers.
    weight_init_stddevs: list or float
      the standard deviation of the distribution to use for weight
      initialization of each layer.  The length of this list should
      equal len(layer_sizes).  Alternatively this may be a single
      value instead of a list, in which case the same value is used
      for every layer.
    bias_init_consts: list or float
      the value to initialize the biases in each layer to.  The
      length of this list should equal len(layer_sizes).
      Alternatively this may be a single value instead of a list, in
      which case the same value is used for every layer.
    weight_decay_penalty: float
      the magnitude of the weight decay penalty to use
    weight_decay_penalty_type: str
      the type of penalty to use for weight decay, either 'l1' or 'l2'
    dropouts: list or float
      the dropout probablity to use for each layer.  The length of this list should equal len(layer_sizes).
      Alternatively this may be a single value instead of a list, in which case the same value is used for every layer.
    activation_fns: list or object
      the Tensorflow activation function to apply to each layer.  The length of this list should equal
      len(layer_sizes).  Alternatively this may be a single value instead of a list, in which case the
      same value is used for every layer.
    residual: bool
      if True, the model will be composed of pre-activation residual blocks instead
      of a simple stack of dense layers.
    learning_rate: float
      Learning rate for the model.
    """

        self.complex_num_atoms = complex_num_atoms
        self.frag1_num_atoms = frag1_num_atoms
        self.frag2_num_atoms = frag2_num_atoms
        self.max_num_neighbors = max_num_neighbors
        self.batch_size = batch_size
        self.atom_types = atom_types

        rp = [x for x in itertools.product(*radial)]
        frag1_X = Input(shape=(frag1_num_atoms, 3))
        frag1_nbrs = Input(shape=(frag1_num_atoms, max_num_neighbors))
        frag1_nbrs_z = Input(shape=(frag1_num_atoms, max_num_neighbors))
        frag1_z = Input(shape=(frag1_num_atoms, ))

        frag2_X = Input(shape=(frag2_num_atoms, 3))
        frag2_nbrs = Input(shape=(frag2_num_atoms, max_num_neighbors))
        frag2_nbrs_z = Input(shape=(frag2_num_atoms, max_num_neighbors))
        frag2_z = Input(shape=(frag2_num_atoms, ))

        complex_X = Input(shape=(complex_num_atoms, 3))
        complex_nbrs = Input(shape=(complex_num_atoms, max_num_neighbors))
        complex_nbrs_z = Input(shape=(complex_num_atoms, max_num_neighbors))
        complex_z = Input(shape=(complex_num_atoms, ))

        self._frag1_conv = AtomicConvolution(
            atom_types=self.atom_types, radial_params=rp,
            boxsize=None)([frag1_X, frag1_nbrs, frag1_nbrs_z])
        flattened1 = Flatten()(self._frag1_conv)

        self._frag2_conv = AtomicConvolution(
            atom_types=self.atom_types, radial_params=rp,
            boxsize=None)([frag2_X, frag2_nbrs, frag2_nbrs_z])
        flattened2 = Flatten()(self._frag2_conv)

        self._complex_conv = AtomicConvolution(
            atom_types=self.atom_types, radial_params=rp,
            boxsize=None)([complex_X, complex_nbrs, complex_nbrs_z])
        flattened3 = Flatten()(self._complex_conv)

        concat = Concatenate()([flattened1, flattened2, flattened3])

        n_layers = len(layer_sizes)
        if not isinstance(weight_init_stddevs, SequenceCollection):
            weight_init_stddevs = [weight_init_stddevs] * n_layers
        if not isinstance(bias_init_consts, SequenceCollection):
            bias_init_consts = [bias_init_consts] * n_layers
        if not isinstance(dropouts, SequenceCollection):
            dropouts = [dropouts] * n_layers
        if not isinstance(activation_fns, SequenceCollection):
            activation_fns = [activation_fns] * n_layers
        if weight_decay_penalty != 0.0:
            if weight_decay_penalty_type == 'l1':
                regularizer = tf.keras.regularizers.l1(weight_decay_penalty)
            else:
                regularizer = tf.keras.regularizers.l2(weight_decay_penalty)
        else:
            regularizer = None

        prev_layer = concat
        prev_size = concat.shape[0]
        next_activation = None

        # Add the dense layers

        for size, weight_stddev, bias_const, dropout, activation_fn in zip(
                layer_sizes, weight_init_stddevs, bias_init_consts, dropouts,
                activation_fns):
            layer = prev_layer
            if next_activation is not None:
                layer = Activation(next_activation)(layer)
            layer = Dense(
                size,
                kernel_initializer=tf.keras.initializers.TruncatedNormal(
                    stddev=weight_stddev),
                bias_initializer=tf.constant_initializer(value=bias_const),
                kernel_regularizer=regularizer)(layer)
            if dropout > 0.0:
                layer = Dropout(rate=dropout)(layer)
            if residual and prev_size == size:
                prev_layer = Lambda(lambda x: x[0] + x[1])([prev_layer, layer])
            else:
                prev_layer = layer
            prev_size = size
            next_activation = activation_fn
            if next_activation is not None:
                prev_layer = Activation(activation_fn)(prev_layer)
        self.neural_fingerprint = prev_layer
        output = Reshape((n_tasks, 1))(Dense(
            n_tasks,
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=weight_init_stddevs[-1]),
            bias_initializer=tf.constant_initializer(
                value=bias_init_consts[-1]))(prev_layer))
        loss: Union[dc.models.losses.Loss, LossFn]

        model = tf.keras.Model(inputs=[
            frag1_X, frag1_nbrs, frag1_nbrs_z, frag1_z, frag2_X, frag2_nbrs,
            frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z,
            complex_z
        ],
                               outputs=output)
        super(AtomicConvModel, self).__init__(model,
                                              L2Loss(),
                                              batch_size=batch_size,
                                              **kwargs)
Beispiel #7
0
  def __init__(self,
               n_tasks: int,
               graph_conv_layers: list = None,
               activation=None,
               residual: bool = True,
               batchnorm: bool = False,
               dropout: float = 0.,
               predictor_hidden_feats: int = 128,
               predictor_dropout: float = 0.,
               mode: str = 'regression',
               number_atom_features=75,
               n_classes: int = 2,
               nfeat_name: str = 'x',
               self_loop: bool = True,
               **kwargs):
    """
        Parameters
        ----------
        n_tasks: int
            Number of tasks.
        graph_conv_layers: list of int
            Width of channels for GCN layers. graph_conv_layers[i] gives the width of channel
            for the i-th GCN layer. If not specified, the default value will be [64, 64].
        activation: callable
            The activation function to apply to the output of each GCN layer.
            By default, no activation function will be applied.
        residual: bool
            Whether to add a residual connection within each GCN layer. Default to True.
        batchnorm: bool
            Whether to apply batch normalization to the output of each GCN layer.
            Default to False.
        dropout: float
            The dropout probability for the output of each GCN layer. Default to 0.
        predictor_hidden_feats: int
            The size for hidden representations in the output MLP predictor. Default to 128.
        predictor_dropout: float
            The dropout probability in the output MLP predictor. Default to 0.
        mode: str
            The model type, 'classification' or 'regression'.
        number_atom_features: int
            The length of the initial atom feature vectors. Default to 75.
        n_classes: int
            The number of classes to predict per task
            (only used when ``mode`` is 'classification').
        nfeat_name: str
            For an input graph ``g``, the model assumes that it stores node features in
            ``g.ndata[nfeat_name]`` and will retrieve input node features from that.
        self_loop: bool
            Whether to add self loops for the nodes, i.e. edges from nodes to themselves.
            Default to True.
        kwargs
            This can include any keyword argument of TorchModel.
        """
    model = GCN(
        graph_conv_layers=graph_conv_layers,
        activation=activation,
        residual=residual,
        batchnorm=batchnorm,
        dropout=dropout,
        predictor_hidden_feats=predictor_hidden_feats,
        predictor_dropout=predictor_dropout,
        n_tasks=n_tasks,
        mode=mode,
        number_atom_features=number_atom_features,
        n_classes=n_classes,
        nfeat_name=nfeat_name)
    if mode == 'regression':
      loss: Loss = L2Loss()
      output_types = ['prediction']
    else:
      loss = SparseSoftmaxCrossEntropy()
      output_types = ['prediction', 'loss']
    super(GCNModel, self).__init__(
        model, loss=loss, output_types=output_types, **kwargs)

    self._self_loop = self_loop