Esempio n. 1
0
class BeplerContactPredictor(Model):
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'contact_prob'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        def concat_pairs(tensor):
            input_mul = tensor[:, :, None] * tensor[:, None, :]
            input_sub = tf.abs(tensor[:, :, None] - tensor[:, None, :])
            output = tf.concat((input_mul, input_sub), -1)
            return output

        self.get_pairwise_feature_vector = Lambda(concat_pairs)

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(
            Conv2D(32, 1, use_bias=True, padding='same', activation='relu'))
        self.predict_contact_map.add(
            Conv2D(1, 7, use_bias=True, padding='same', activation='linear'))

    def call(self, inputs):
        encoder_output = inputs[self._input_name]
        tf.add_to_collection('checkpoints', encoder_output)

        z = self.get_pairwise_feature_vector(encoder_output)
        sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask(
            encoder_output, inputs['protein_length'])
        mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None]

        prediction = self.predict_contact_map(z, mask=mask_2d)
        inputs[self._output_name] = prediction
        return inputs
Esempio n. 2
0
 def __init__(self,
              n_classes: int,
              input_name: str = 'encoder_output',
              output_name: str = 'sequence_logits',
              use_conv: bool = True) -> None:
     super().__init__()
     self._input_name = input_name
     self._output_name = output_name
     if use_conv:
         self.predict_class = Stack([
             LayerNorm(),
             Conv1D(128,
                    5,
                    activation='relu',
                    padding='same',
                    use_bias=True),
             Conv1D(n_classes,
                    3,
                    activation=None,
                    padding='same',
                    use_bias=True)
         ])
     else:
         self.predict_class = Stack([
             LayerNorm(),
             Dense(512, activation='relu'),
             Dense(n_classes, activation=None)
         ])
Esempio n. 3
0
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'sequence_logits'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        self.get_pairwise_feature_vector = Stack()
        self.get_pairwise_feature_vector.add(Dense(64, activation='linear'))

        def concat_pairs(tensor):
            seqlen = tf.shape(tensor)[1]
            input_left = tf.tile(tensor[:, :, None], (1, 1, seqlen, 1))
            input_right = tf.tile(tensor[:, None, :], (1, seqlen, 1, 1))
            output = tf.concat((input_left, input_right), -1)
            return output

        self.get_pairwise_feature_vector.add(Lambda(concat_pairs))

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(PaddedConv(2, 64, 1, dropout=0.1))
        for layer in range(30):
            self.predict_contact_map.add(
                ResidualBlock(2,
                              64,
                              3,
                              dropout=0.1,
                              add_checkpoint=layer % 5 == 0))
        self.predict_contact_map.add(Dense(1, activation='linear'))
Esempio n. 4
0
 def __init__(self,
              input_name: str = 'encoder_output',
              output_name: str = 'cls_vector'):
     super().__init__()
     self._input_name = input_name
     self._output_name = output_name
     self.compute_attention = Stack([LayerNorm(), Dense(1, activation='linear'), Dropout(0.1)])
     self.attention_mask = ApplyAttentionMask()
Esempio n. 5
0
class ResidueResidueContactPredictor(Model):
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'sequence_logits'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        self.get_pairwise_feature_vector = Stack()
        self.get_pairwise_feature_vector.add(Dense(64, activation='linear'))

        def concat_pairs(tensor):
            seqlen = tf.shape(tensor)[1]
            input_left = tf.tile(tensor[:, :, None], (1, 1, seqlen, 1))
            input_right = tf.tile(tensor[:, None, :], (1, seqlen, 1, 1))
            output = tf.concat((input_left, input_right), -1)
            return output

        self.get_pairwise_feature_vector.add(Lambda(concat_pairs))

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(PaddedConv(2, 64, 1, dropout=0.1))
        for layer in range(30):
            self.predict_contact_map.add(
                ResidualBlock(2,
                              64,
                              3,
                              dropout=0.1,
                              add_checkpoint=layer % 5 == 0))
        self.predict_contact_map.add(Dense(1, activation='linear'))

    def call(self, inputs):
        encoder_output = inputs[self._input_name]
        tf.add_to_collection('checkpoints', encoder_output)

        z = self.get_pairwise_feature_vector(encoder_output)
        self.pairwise_z = z
        sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask(
            encoder_output, inputs['protein_length'])
        mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None]

        prediction = self.predict_contact_map(z, mask=mask_2d)
        prediction = (prediction +
                      tf.transpose(prediction, (0, 2, 1, 3))) / 2  # symmetrize
        inputs[self._output_name] = prediction
        return inputs
Esempio n. 6
0
 def __init__(self,
              d_output: int,
              input_name: str = 'cls_vector',
              output_name: str = 'prediction') -> None:
     super().__init__()
     self._d_output = d_output
     self._input_name = input_name
     self._output_name = output_name
     self.predict_vector = Stack([LayerNorm(), Dense(512, 'relu'), Dropout(0.5), Dense(d_output)])
Esempio n. 7
0
    def build_task_model(embedding_model: Model, tasks: List[Task],
                         freeze_embedding_weights: bool) -> Model:
        layers = [embedding_model]

        if freeze_embedding_weights:
            layers.append(FreezeWeights())

        for task in tasks:
            layers = task.build_output_model(layers)
        return Stack(layers)
Esempio n. 8
0
    def __init__(self,
                 input_name: str = 'encoder_output',
                 output_name: str = 'contact_prob'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        def concat_pairs(tensor):
            input_mul = tensor[:, :, None] * tensor[:, None, :]
            input_sub = tf.abs(tensor[:, :, None] - tensor[:, None, :])
            output = tf.concat((input_mul, input_sub), -1)
            return output

        self.get_pairwise_feature_vector = Lambda(concat_pairs)

        self.predict_contact_map = Stack()
        self.predict_contact_map.add(
            Conv2D(32, 1, use_bias=True, padding='same', activation='relu'))
        self.predict_contact_map.add(
            Conv2D(1, 7, use_bias=True, padding='same', activation='linear'))
Esempio n. 9
0
    def __init__(self, input_name: str = 'encoder_output'):
        super().__init__()
        self._input_name = input_name

        self.convs = Stack([
            PaddedConv(1, 32, 129),
            PaddedConv(1, 32, 257)])
        self.bilstm = Stack([
            BidirectionalCudnnLSTM(1024, return_sequences=True),
            BidirectionalCudnnLSTM(1024, return_sequences=True)])

        # Need to predict phi, psi, rsa, disorder, ss3, ss8
        num_outputs = 0
        num_outputs += 1  # phi
        num_outputs += 2  # psi
        num_outputs += 1  # rsa
        num_outputs += 1  # disorder
        num_outputs += 1  # interface
        num_outputs += 3  # ss3
        num_outputs += 8  # ss8

        self.predict_outputs = Dense(num_outputs, activation=None)
Esempio n. 10
0
    def __init__(self,
                 n_symbols: int,
                 n_units: int = 1024,
                 n_layers: int = 3,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)

        if dropout is None:
            dropout = 0

        self.embedding = Embedding(n_symbols, 128)

        self.forward_lstm = Stack([
            LSTM(n_units,
                 return_sequences=True) for _ in range(n_layers)],
            name='forward_lstm')

        self.reverse_lstm = Stack([
            LSTM(n_units,
                 return_sequences=True) for _ in range(n_layers)],
            name='reverse_lstm')

        self.dropout = Dropout(dropout)
Esempio n. 11
0
    def __init__(self, n_symbols: int, dropout: float = 0, use_pfam_alphabet: bool = True):
        super().__init__()

        self._use_pfam_alphabet = use_pfam_alphabet

        if use_pfam_alphabet:
            self.embed = Embedding(n_symbols, n_symbols)
        else:
            n_symbols = 21
            self.embed = Embedding(n_symbols + 1, n_symbols)

        self.dropout = Dropout(dropout)
        self.rnn = Stack([
            LSTM(1024, return_sequences=True, use_bias=True,
                 implementation=2, recurrent_activation='sigmoid'),
            LSTM(1024, return_sequences=True, use_bias=True,
                 implementation=2, recurrent_activation='sigmoid')])

        self.compute_logits = Dense(n_symbols, use_bias=True, activation='linear')
Esempio n. 12
0
    def __init__(self,
                 n_symbols: int,
                 dropout: float = 0,
                 use_pfam_alphabet: bool = True):
        if not use_pfam_alphabet:
            n_symbols = 21

        super().__init__(n_symbols)
        self._use_pfam_alphabet = use_pfam_alphabet

        self.embed = LMEmbed(n_symbols, dropout)
        self.dropout = Dropout(dropout)
        lstm = Stack([
            Bidirectional(
                LSTM(512, return_sequences=True, use_bias=True,
                     recurrent_activation='sigmoid', implementation=2))
            for _ in range(3)])
        self.rnn = lstm
        self.proj = Dense(100, use_bias=True, activation='linear')
        self.random_replace = RandomReplaceMask(0.05, n_symbols)
Esempio n. 13
0
    def __init__(self, n_symbols, length=3000):
        super().__init__(n_symbols)
        self._length = length
        
        encoder = Stack()
        encoder.add(Embedding(n_symbols, 128, input_length=self._length))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Flatten())
        encoder.add(Dense(1000))
        
        decoder = Stack()
        decoder.add(Dense(47*256, input_shape=(1000,), activation='relu'))
        decoder.add(Reshape((47, 256)))
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(BatchNormalization())
        decoder.add(UpSampling1D(2))
        decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        decoder.add(Cropping1D((0,8)))

        self.encoder = encoder
        self.decoder = decoder
Esempio n. 14
0
    def __init__(self,
                 n_symbols: int,
                 n_layers: int = 35,
                 filters: int = 256,
                 kernel_size: int = 9,
                 layer_norm: bool = True,
                 activation: str = 'elu',
                 dilation_rate: int = 2,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)
        self.n_symbols = n_symbols
        self.n_layers = n_layers
        self.filters = filters
        self.kernel_size = kernel_size
        self.layer_norm = layer_norm
        self.activation = activation
        self.dilation_rate = dilation_rate
        self.dropout = dropout

        print(self)

        input_embedding = Stack()
        input_embedding.add(Embedding(n_symbols, 128))
        input_embedding.add(Lambda(lambda x: x * np.sqrt(filters)))
        input_embedding.add(PositionEmbedding())

        encoder = Stack()
        encoder.add(input_embedding)
        encoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout))
        encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                  dilation_rate=1, dropout=dropout))
        for layer in range(n_layers - 1):
            encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                      dilation_rate=dilation_rate, dropout=dropout,
                                      add_checkpoint=layer % 5 == 0))

        self.encoder = encoder
        
        self.z_mu = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0)
        self.z_var = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0)
        
        decoder = Stack()
        decoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout))
        decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                  dilation_rate=1, dropout=dropout))
        for layer in range(n_layers - 1):
            decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation,
                                      dilation_rate=dilation_rate, dropout=dropout,
                                      add_checkpoint=layer % 5 == 0))

        self.decoder = decoder
Esempio n. 15
0
class MyModel(AbstractTapeModel):
    @hparams.capture
    def __init__(self, n_symbols, latent_size=32, max_seq_len=10000):

        self.latent_size = latent_size
        self.max_seq_len = max_seq_len

        super().__init__(n_symbols)

        self.input_embedding = Embedding(n_symbols, 128)

        enc = Stack()
        enc.add(
            Conv1D(filters=32,
                   kernel_size=7,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=64,
                   kernel_size=5,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=128,
                   kernel_size=3,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))

        self.enc_mu = Stack()
        self.enc_mu.add(enc)
        self.enc_mu.add(Flatten())
        self.enc_mu.add(Dense(latent_size))

        self.enc_std = Stack()
        self.enc_std.add(enc)
        self.enc_std.add(Flatten())
        self.enc_std.add(Dense(latent_size, activation='softplus'))

        self.dec = Stack()
        self.dec.add(Dense(1000))
        self.dec.add(Reshape((100, 10)))
        self.dec.add(
            Conv1DTranspose(filters=128,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=64,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=32,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))

    def call(self, inputs):
        sequence = inputs['primary']

        embedded = self.input_embedding(sequence)
        pad_embedded = pad_up_to(embedded, (-1, self.max_seq_len, -1), 0)
        pad_embedded.set_shape((None, self.max_seq_len, 128))

        z_mu = self.enc_mu(pad_embedded)
        z_std = self.enc_std(pad_embedded)
        z = z_mu + K.random_normal(K.shape(z_std)) * z_std

        encoder_output = self.dec(z)

        inputs['encoder_output'] = encoder_output
        return inputs

    def get_optimal_batch_sizes(self):
        bucket_sizes = np.array(
            [100, 200, 300, 400, 600, 900, 1000, 1300, 2000, 3000])
        batch_sizes = np.array([4, 4, 4, 4, 3, 3, 3, 2, 1, 0.5, 0])

        batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(),
                                 np.int32)
        batch_sizes[batch_sizes <= 0] = 1
        return bucket_sizes, batch_sizes
Esempio n. 16
0
    def __init__(self, n_symbols, latent_size=32, max_seq_len=10000):

        self.latent_size = latent_size
        self.max_seq_len = max_seq_len

        super().__init__(n_symbols)

        self.input_embedding = Embedding(n_symbols, 128)

        enc = Stack()
        enc.add(
            Conv1D(filters=32,
                   kernel_size=7,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=64,
                   kernel_size=5,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))
        enc.add(
            Conv1D(filters=128,
                   kernel_size=3,
                   strides=1,
                   dilation_rate=2,
                   activation='relu'))

        self.enc_mu = Stack()
        self.enc_mu.add(enc)
        self.enc_mu.add(Flatten())
        self.enc_mu.add(Dense(latent_size))

        self.enc_std = Stack()
        self.enc_std.add(enc)
        self.enc_std.add(Flatten())
        self.enc_std.add(Dense(latent_size, activation='softplus'))

        self.dec = Stack()
        self.dec.add(Dense(1000))
        self.dec.add(Reshape((100, 10)))
        self.dec.add(
            Conv1DTranspose(filters=128,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=64,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
        self.dec.add(
            Conv1DTranspose(filters=32,
                            kernel_size=3,
                            strides=1,
                            dilation_rate=2,
                            activation='relu'))
Esempio n. 17
0
 def build_output_model(
         self, layers: List[tf.keras.Model]) -> List[tf.keras.Model]:
     ssa = SoftSymmetricAlignment(Stack(layers))
     return [ssa, OrdinalRegression(5)]
Esempio n. 18
0
    def __init__(self,
                 n_symbols,
                 n_layers=5,
                 length=3000,
                 latent_size=1000,
                 n_filters=256,
                 kernel_size=5,
                 pooling_type='average',
                 dropout=0):
        super().__init__(n_symbols)
        self._n_layers = n_layers
        self._length = length
        self._latent_size = latent_size
        self._kernel_size = kernel_size
        self._n_filters = n_filters
        pool = AveragePooling1D if pooling_type == 'average' else MaxPooling1D

        input_embedding = Stack()
        input_embedding.add(
            Embedding(n_symbols, 128, input_length=self._length))
        input_embedding.add(Lambda(lambda x: x * np.sqrt(n_filters)))
        input_embedding.add(PositionEmbedding())
        input_embedding.add(
            PaddedConv(1,
                       n_filters,
                       kernel_size,
                       1,
                       activation='relu',
                       dropout=dropout))

        encoder = Stack()
        encoder.add(input_embedding)
        for _ in range(6):
            for _ in range(n_layers):
                encoder.add(
                    ResidualBlock(1,
                                  n_filters,
                                  kernel_size,
                                  activation='relu',
                                  dilation_rate=1,
                                  dropout=dropout))
            encoder.add(pool(2, 2))

        latent = Stack()
        latent.add(Flatten())
        latent.add(Dense(self._latent_size))

        decoder = Stack()
        decoder.add(
            Dense(47 * n_filters,
                  input_shape=(self._latent_size, ),
                  activation='relu'))
        decoder.add(Reshape((47, n_filters)))
        for _ in range(6):
            decoder.add(UpSampling1D(2))
            for _ in range(n_layers):
                encoder.add(
                    ResidualBlock(1,
                                  n_filters,
                                  kernel_size,
                                  activation='relu',
                                  dilation_rate=1,
                                  dropout=dropout))
        decoder.add(Cropping1D((0, 8)))

        self.encoder = encoder
        self.decoder = decoder
        self.latent = latent
Esempio n. 19
0
    def __init__(self,
                 n_classes: int,
                 input_name: str = 'encoder_output',
                 output_name: str = 'logits'):
        super().__init__()
        self._input_name = input_name
        self._output_name = output_name

        def max_pool_30(x):
            maxpool, _ = tf.nn.top_k(x, 30)
            return maxpool

        conv6 = Stack(
            [DeepSFConv(10, 6) for _ in range(10)] +
            [Permute([2, 1]), Lambda(max_pool_30),
             Flatten()])
        conv10 = Stack(
            [DeepSFConv(10, 10) for _ in range(10)] +
            [Permute([2, 1]), Lambda(max_pool_30),
             Flatten()])

        output_model = Stack()

        # Make conv layers
        output_model.add(Concatenate(-1))
        output_model.add(
            Dense(500,
                  activation='relu',
                  kernel_initializer='he_normal',
                  kernel_constraint=tf.keras.constraints.max_norm(3)))
        output_model.add(Dropout(0.2))
        output_model.add(Dense(n_classes, kernel_initializer='he_normal'))

        self.conv6 = conv6
        self.conv10 = conv10
        self.output_model = output_model