Exemple #1
0
class BeplerContactPredictor(Model):
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'contact_prob'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        def concat_pairs(tensor):
            input_mul = tensor[:, :, None] * tensor[:, None, :]
            input_sub = tf.abs(tensor[:, :, None] - tensor[:, None, :])
            output = tf.concat((input_mul, input_sub), -1)
            return output

        self.get_pairwise_feature_vector = Lambda(concat_pairs)

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(
            Conv2D(32, 1, use_bias=True, padding='same', activation='relu'))
        self.predict_contact_map.add(
            Conv2D(1, 7, use_bias=True, padding='same', activation='linear'))

    def call(self, inputs):
        encoder_output = inputs[self._input_name]
        tf.add_to_collection('checkpoints', encoder_output)

        z = self.get_pairwise_feature_vector(encoder_output)
        sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask(
            encoder_output, inputs['protein_length'])
        mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None]

        prediction = self.predict_contact_map(z, mask=mask_2d)
        inputs[self._output_name] = prediction
        return inputs
Exemple #2
0
class ResidueResidueContactPredictor(Model):
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'sequence_logits'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        self.get_pairwise_feature_vector = Stack()
        self.get_pairwise_feature_vector.add(Dense(64, activation='linear'))

        def concat_pairs(tensor):
            seqlen = tf.shape(tensor)[1]
            input_left = tf.tile(tensor[:, :, None], (1, 1, seqlen, 1))
            input_right = tf.tile(tensor[:, None, :], (1, seqlen, 1, 1))
            output = tf.concat((input_left, input_right), -1)
            return output

        self.get_pairwise_feature_vector.add(Lambda(concat_pairs))

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(PaddedConv(2, 64, 1, dropout=0.1))
        for layer in range(30):
            self.predict_contact_map.add(
                ResidualBlock(2,
                              64,
                              3,
                              dropout=0.1,
                              add_checkpoint=layer % 5 == 0))
        self.predict_contact_map.add(Dense(1, activation='linear'))

    def call(self, inputs):
        encoder_output = inputs[self._input_name]
        tf.add_to_collection('checkpoints', encoder_output)

        z = self.get_pairwise_feature_vector(encoder_output)
        self.pairwise_z = z
        sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask(
            encoder_output, inputs['protein_length'])
        mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None]

        prediction = self.predict_contact_map(z, mask=mask_2d)
        prediction = (prediction +
                      tf.transpose(prediction, (0, 2, 1, 3))) / 2  # symmetrize
        inputs[self._output_name] = prediction
        return inputs
Exemple #3
0
    def __init__(self, n_symbols, latent_size=32, max_seq_len=10000):

        self.latent_size = latent_size
        self.max_seq_len = max_seq_len

        super().__init__(n_symbols)

        self.input_embedding = Embedding(n_symbols, 128)

        enc = Stack()
        enc.add(
            Conv1D(filters=32,
                   kernel_size=7,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=64,
                   kernel_size=5,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=128,
                   kernel_size=3,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))

        self.enc_mu = Stack()
        self.enc_mu.add(enc)
        self.enc_mu.add(Flatten())
        self.enc_mu.add(Dense(latent_size))

        self.enc_std = Stack()
        self.enc_std.add(enc)
        self.enc_std.add(Flatten())
        self.enc_std.add(Dense(latent_size, activation='softplus'))

        self.dec = Stack()
        self.dec.add(Dense(1000))
        self.dec.add(Reshape((100, 10)))
        self.dec.add(
            Conv1DTranspose(filters=128,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=64,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=32,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
Exemple #4
0
    def __init__(self,
                 n_classes: int,
                 input_name: str = 'encoder_output',
                 output_name: str = 'logits'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        def max_pool_30(x):
            maxpool, _ = tf.nn.top_k(x, 30)
            return maxpool

        conv6 = Stack(
            [DeepSFConv(10, 6) for _ in range(10)] +
            [Permute([2, 1]), Lambda(max_pool_30),
             Flatten()])
        conv10 = Stack(
            [DeepSFConv(10, 10) for _ in range(10)] +
            [Permute([2, 1]), Lambda(max_pool_30),
             Flatten()])

        output_model = Stack()

        # Make conv layers
        output_model.add(Concatenate(-1))
        output_model.add(
            Dense(500,
                  activation='relu',
                  kernel_initializer='he_normal',
                  kernel_constraint=tf.keras.constraints.max_norm(3)))
        output_model.add(Dropout(0.2))
        output_model.add(Dense(n_classes, kernel_initializer='he_normal'))

        self.conv6 = conv6
        self.conv10 = conv10
        self.output_model = output_model
Exemple #5
0
    def __init__(self,
                 n_symbols: int,
                 n_layers: int = 35,
                 filters: int = 256,
                 kernel_size: int = 9,
                 layer_norm: bool = True,
                 activation: str = 'elu',
                 dilation_rate: int = 2,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)
        self.n_symbols = n_symbols
        self.n_layers = n_layers
        self.filters = filters
        self.kernel_size = kernel_size
        self.layer_norm = layer_norm
        self.activation = activation
        self.dilation_rate = dilation_rate
        self.dropout = dropout

        print(self)

        input_embedding = Stack()
        input_embedding.add(Embedding(n_symbols, 128))
        input_embedding.add(Lambda(lambda x: x * np.sqrt(filters)))
        input_embedding.add(PositionEmbedding())

        encoder = Stack()
        encoder.add(input_embedding)
        encoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout))
        encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                  dilation_rate=1, dropout=dropout))
        for layer in range(n_layers - 1):
            encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                      dilation_rate=dilation_rate, dropout=dropout,
                                      add_checkpoint=layer % 5 == 0))

        self.encoder = encoder
        
        self.z_mu = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0)
        self.z_var = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0)
        
        decoder = Stack()
        decoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout))
        decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                  dilation_rate=1, dropout=dropout))
        for layer in range(n_layers - 1):
            decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                      dilation_rate=dilation_rate, dropout=dropout,
                                      add_checkpoint=layer % 5 == 0))

        self.decoder = decoder
Exemple #6
0
    def __init__(self, n_symbols, length=3000):
        super().__init__(n_symbols)
        self._length = length
        
        encoder = Stack()
        encoder.add(Embedding(n_symbols, 128, input_length=self._length))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Flatten())
        encoder.add(Dense(1000))
        
        decoder = Stack()
        decoder.add(Dense(47*256, input_shape=(1000,), activation='relu'))
        decoder.add(Reshape((47, 256)))
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(Cropping1D((0,8)))

        self.encoder = encoder
        self.decoder = decoder
Exemple #7
0
    def __init__(self,
                 n_symbols,
                 n_layers=5,
                 length=3000,
                 latent_size=1000,
                 n_filters=256,
                 kernel_size=5,
                 pooling_type='average',
                 dropout=0):
        super().__init__(n_symbols)
        self._n_layers = n_layers
        self._length = length
        self._latent_size = latent_size
        self._kernel_size = kernel_size
        self._n_filters = n_filters
        pool = AveragePooling1D if pooling_type == 'average' else MaxPooling1D

        input_embedding = Stack()
        input_embedding.add(
            Embedding(n_symbols, 128, input_length=self._length))
        input_embedding.add(Lambda(lambda x: x * np.sqrt(n_filters)))
        input_embedding.add(PositionEmbedding())
        input_embedding.add(
            PaddedConv(1,
                       n_filters,
                       kernel_size,
                       1,
                       activation='relu',
                       dropout=dropout))

        encoder = Stack()
        encoder.add(input_embedding)
        for _ in range(6):
            for _ in range(n_layers):
                encoder.add(
                    ResidualBlock(1,
                                  n_filters,
                                  kernel_size,
                                  activation='relu',
                                  dilation_rate=1,
                                  dropout=dropout))
            encoder.add(pool(2, 2))

        latent = Stack()
        latent.add(Flatten())
        latent.add(Dense(self._latent_size))

        decoder = Stack()
        decoder.add(
            Dense(47 * n_filters,
                  input_shape=(self._latent_size, ),
                  activation='relu'))
        decoder.add(Reshape((47, n_filters)))
        for _ in range(6):
            decoder.add(UpSampling1D(2))
            for _ in range(n_layers):
                encoder.add(
                    ResidualBlock(1,
                                  n_filters,
                                  kernel_size,
                                  activation='relu',
                                  dilation_rate=1,
                                  dropout=dropout))
        decoder.add(Cropping1D((0, 8)))

        self.encoder = encoder
        self.decoder = decoder
        self.latent = latent
Exemple #8
0
class MyModel(AbstractTapeModel):
    @hparams.capture
    def __init__(self, n_symbols, latent_size=32, max_seq_len=10000):

        self.latent_size = latent_size
        self.max_seq_len = max_seq_len

        super().__init__(n_symbols)

        self.input_embedding = Embedding(n_symbols, 128)

        enc = Stack()
        enc.add(
            Conv1D(filters=32,
                   kernel_size=7,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=64,
                   kernel_size=5,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=128,
                   kernel_size=3,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))

        self.enc_mu = Stack()
        self.enc_mu.add(enc)
        self.enc_mu.add(Flatten())
        self.enc_mu.add(Dense(latent_size))

        self.enc_std = Stack()
        self.enc_std.add(enc)
        self.enc_std.add(Flatten())
        self.enc_std.add(Dense(latent_size, activation='softplus'))

        self.dec = Stack()
        self.dec.add(Dense(1000))
        self.dec.add(Reshape((100, 10)))
        self.dec.add(
            Conv1DTranspose(filters=128,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=64,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=32,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))

    def call(self, inputs):
        sequence = inputs['primary']

        embedded = self.input_embedding(sequence)
        pad_embedded = pad_up_to(embedded, (-1, self.max_seq_len, -1), 0)
        pad_embedded.set_shape((None, self.max_seq_len, 128))

        z_mu = self.enc_mu(pad_embedded)
        z_std = self.enc_std(pad_embedded)
        z = z_mu + K.random_normal(K.shape(z_std)) * z_std

        encoder_output = self.dec(z)

        inputs['encoder_output'] = encoder_output
        return inputs

    def get_optimal_batch_sizes(self):
        bucket_sizes = np.array(
            [100, 200, 300, 400, 600, 900, 1000, 1300, 2000, 3000])
        batch_sizes = np.array([4, 4, 4, 4, 3, 3, 3, 2, 1, 0.5, 0])

        batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(),
                                 np.int32)
        batch_sizes[batch_sizes <= 0] = 1
        return bucket_sizes, batch_sizes