예제 #1
0
def test_mat_embedding():
    """Test invoking MATEmbedding."""
    torch.manual_seed(0)
    input_ar = torch.tensor([1., 2., 3.])
    layer = torch_layers.MATEmbedding(3, 1, 0.0)
    result = layer(input_ar).detach()
    output_ar = torch.tensor([-1.2353])
    assert torch.allclose(result, output_ar, rtol=1e-4)
예제 #2
0
def test_mat_encoder_layer():
    """Test invoking MATEncoderLayer."""
    input_smile = "CC"
    feat = dc.feat.MATFeaturizer()
    input_smile = "CC"
    out = feat.featurize(input_smile)
    node = torch.tensor(out[0].node_features).float().unsqueeze(0)
    adj = torch.tensor(out[0].adjacency_matrix).float().unsqueeze(0)
    dist = torch.tensor(out[0].distance_matrix).float().unsqueeze(0)
    mask = torch.sum(torch.abs(node), dim=-1) != 0
    layer = torch_layers.MATEncoderLayer()
    op = torch_layers.MATEmbedding()(node)
    output = layer(op, mask, adj, dist)
    assert (output.shape == (1, 3, 1024))
예제 #3
0
def test_multi_headed_mat_attention():
    """Test invoking MultiHeadedMATAttention."""
    feat = dc.feat.MATFeaturizer()
    input_smile = "CC"
    out = feat.featurize(input_smile)
    node = torch.tensor(out[0].node_features).float().unsqueeze(0)
    adj = torch.tensor(out[0].adjacency_matrix).float().unsqueeze(0)
    dist = torch.tensor(out[0].distance_matrix).float().unsqueeze(0)
    mask = torch.sum(torch.abs(node), dim=-1) != 0
    layer = torch_layers.MultiHeadedMATAttention(dist_kernel='softmax',
                                                 lambda_attention=0.33,
                                                 lambda_distance=0.33,
                                                 h=16,
                                                 hsize=1024,
                                                 dropout_p=0.0)
    op = torch_layers.MATEmbedding()(node)
    output = layer(op, op, op, mask, adj, dist)
    assert (output.shape == (1, 3, 1024))
예제 #4
0
    def __init__(self,
                 dist_kernel: str = 'softmax',
                 n_encoders=8,
                 lambda_attention: float = 0.33,
                 lambda_distance: float = 0.33,
                 h: int = 16,
                 sa_hsize: int = 1024,
                 sa_dropout_p: float = 0.0,
                 output_bias: bool = True,
                 d_input: int = 1024,
                 d_hidden: int = 1024,
                 d_output: int = 1024,
                 activation: str = 'leakyrelu',
                 n_layers: int = 1,
                 ff_dropout_p: float = 0.0,
                 encoder_hsize: int = 1024,
                 encoder_dropout_p: float = 0.0,
                 embed_input_hsize: int = 36,
                 embed_dropout_p: float = 0.0,
                 gen_aggregation_type: str = 'mean',
                 gen_dropout_p: float = 0.0,
                 gen_n_layers: int = 1,
                 gen_attn_hidden: int = 128,
                 gen_attn_out: int = 4,
                 gen_d_output: int = 1,
                 **kwargs):
        '''
    Initialization for the internal MAT class.

    Parameters
    ----------
    dist_kernel: str
        Kernel activation to be used. Can be either 'softmax' for softmax or 'exp' for exponential, for the self-attention layer.
    n_encoders: int
        Number of encoder layers in the encoder block.
    lambda_attention: float
        Constant to be multiplied with the attention matrix in the self-attention layer.
    lambda_distance: float
        Constant to be multiplied with the distance matrix in the self-attention layer.
    h: int
        Number of attention heads for the self-attention layer.
    sa_hsize: int
        Size of dense layer in the self-attention layer.
    sa_dropout_p: float
        Dropout probability for the self-attention layer.
    output_bias: bool
        If True, dense layers will use bias vectors in the self-attention layer.
    d_input: int
        Size of input layer in the feed-forward layer.
    d_hidden: int
        Size of hidden layer in the feed-forward layer. Will also be used as d_output for the MATEmbedding layer.
    d_output: int
        Size of output layer in the feed-forward layer.
    activation: str
        Activation function to be used in the feed-forward layer.
        Can choose between 'relu' for ReLU, 'leakyrelu' for LeakyReLU, 'prelu' for PReLU,
        'tanh' for TanH, 'selu' for SELU, 'elu' for ELU and 'linear' for linear activation.
    n_layers: int
        Number of layers in the feed-forward layer.
    ff_dropout_p: float
        Dropout probability in the feeed-forward layer.
    encoder_hsize: int
        Size of Dense layer for the encoder itself.
    encoder_dropout_p: float
        Dropout probability for connections in the encoder layer.
    embed_input_hsize: int
        Size of input layer for the MATEmbedding layer.
    embed_dropout_p: float
        Dropout probability for the MATEmbedding layer.
    gen_aggregation_type: str
        Type of aggregation to be used. Can be 'grover', 'mean' or 'contextual'.
    gen_dropout_p: float
        Dropout probability for the MATGenerator layer.
    gen_n_layers: int
        Number of layers in MATGenerator.
    gen_attn_hidden: int
        Size of hidden attention layer in the MATGenerator layer.
    gen_attn_out: int
        Size of output attention layer in the MATGenerator layer.
    gen_d_output: int
        Size of output layer in the MATGenerator layer.
    '''

        super(MAT, self).__init__()

        self.embedding = layers.MATEmbedding(d_input=embed_input_hsize,
                                             d_output=d_hidden,
                                             dropout_p=embed_dropout_p)

        self.encoder = nn.ModuleList([
            layers.MATEncoderLayer(dist_kernel=dist_kernel,
                                   lambda_attention=lambda_attention,
                                   lambda_distance=lambda_distance,
                                   h=h,
                                   sa_hsize=sa_hsize,
                                   sa_dropout_p=sa_dropout_p,
                                   output_bias=output_bias,
                                   d_input=d_input,
                                   d_hidden=d_hidden,
                                   d_output=d_output,
                                   activation=activation,
                                   n_layers=n_layers,
                                   ff_dropout_p=ff_dropout_p,
                                   encoder_hsize=encoder_hsize,
                                   encoder_dropout_p=encoder_dropout_p)
            for _ in range(n_encoders)
        ])

        self.generator = layers.MATGenerator(
            hsize=d_input,
            aggregation_type=gen_aggregation_type,
            d_output=gen_d_output,
            n_layers=gen_n_layers,
            dropout_p=gen_dropout_p,
            attn_hidden=gen_attn_hidden,
            attn_out=gen_attn_out)