class BeplerContactPredictor(Model): def __init__(self, input_name: str = 'encoder_output', output_name: str = 'contact_prob'): super().__init__() self._input_name = input_name self._output_name = output_name def concat_pairs(tensor): input_mul = tensor[:, :, None] * tensor[:, None, :] input_sub = tf.abs(tensor[:, :, None] - tensor[:, None, :]) output = tf.concat((input_mul, input_sub), -1) return output self.get_pairwise_feature_vector = Lambda(concat_pairs) self.predict_contact_map = Stack() self.predict_contact_map.add( Conv2D(32, 1, use_bias=True, padding='same', activation='relu')) self.predict_contact_map.add( Conv2D(1, 7, use_bias=True, padding='same', activation='linear')) def call(self, inputs): encoder_output = inputs[self._input_name] tf.add_to_collection('checkpoints', encoder_output) z = self.get_pairwise_feature_vector(encoder_output) sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask( encoder_output, inputs['protein_length']) mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None] prediction = self.predict_contact_map(z, mask=mask_2d) inputs[self._output_name] = prediction return inputs
def __init__(self, n_classes: int, input_name: str = 'encoder_output', output_name: str = 'sequence_logits', use_conv: bool = True) -> None: super().__init__() self._input_name = input_name self._output_name = output_name if use_conv: self.predict_class = Stack([ LayerNorm(), Conv1D(128, 5, activation='relu', padding='same', use_bias=True), Conv1D(n_classes, 3, activation=None, padding='same', use_bias=True) ]) else: self.predict_class = Stack([ LayerNorm(), Dense(512, activation='relu'), Dense(n_classes, activation=None) ])
def __init__(self, input_name: str = 'encoder_output', output_name: str = 'sequence_logits'): super().__init__() self._input_name = input_name self._output_name = output_name self.get_pairwise_feature_vector = Stack() self.get_pairwise_feature_vector.add(Dense(64, activation='linear')) def concat_pairs(tensor): seqlen = tf.shape(tensor)[1] input_left = tf.tile(tensor[:, :, None], (1, 1, seqlen, 1)) input_right = tf.tile(tensor[:, None, :], (1, seqlen, 1, 1)) output = tf.concat((input_left, input_right), -1) return output self.get_pairwise_feature_vector.add(Lambda(concat_pairs)) self.predict_contact_map = Stack() self.predict_contact_map.add(PaddedConv(2, 64, 1, dropout=0.1)) for layer in range(30): self.predict_contact_map.add( ResidualBlock(2, 64, 3, dropout=0.1, add_checkpoint=layer % 5 == 0)) self.predict_contact_map.add(Dense(1, activation='linear'))
def __init__(self, input_name: str = 'encoder_output', output_name: str = 'cls_vector'): super().__init__() self._input_name = input_name self._output_name = output_name self.compute_attention = Stack([LayerNorm(), Dense(1, activation='linear'), Dropout(0.1)]) self.attention_mask = ApplyAttentionMask()
class ResidueResidueContactPredictor(Model): def __init__(self, input_name: str = 'encoder_output', output_name: str = 'sequence_logits'): super().__init__() self._input_name = input_name self._output_name = output_name self.get_pairwise_feature_vector = Stack() self.get_pairwise_feature_vector.add(Dense(64, activation='linear')) def concat_pairs(tensor): seqlen = tf.shape(tensor)[1] input_left = tf.tile(tensor[:, :, None], (1, 1, seqlen, 1)) input_right = tf.tile(tensor[:, None, :], (1, seqlen, 1, 1)) output = tf.concat((input_left, input_right), -1) return output self.get_pairwise_feature_vector.add(Lambda(concat_pairs)) self.predict_contact_map = Stack() self.predict_contact_map.add(PaddedConv(2, 64, 1, dropout=0.1)) for layer in range(30): self.predict_contact_map.add( ResidualBlock(2, 64, 3, dropout=0.1, add_checkpoint=layer % 5 == 0)) self.predict_contact_map.add(Dense(1, activation='linear')) def call(self, inputs): encoder_output = inputs[self._input_name] tf.add_to_collection('checkpoints', encoder_output) z = self.get_pairwise_feature_vector(encoder_output) self.pairwise_z = z sequence_mask = rk.utils.convert_sequence_length_to_sequence_mask( encoder_output, inputs['protein_length']) mask_2d = sequence_mask[:, None, :] & sequence_mask[:, :, None] prediction = self.predict_contact_map(z, mask=mask_2d) prediction = (prediction + tf.transpose(prediction, (0, 2, 1, 3))) / 2 # symmetrize inputs[self._output_name] = prediction return inputs
def __init__(self, d_output: int, input_name: str = 'cls_vector', output_name: str = 'prediction') -> None: super().__init__() self._d_output = d_output self._input_name = input_name self._output_name = output_name self.predict_vector = Stack([LayerNorm(), Dense(512, 'relu'), Dropout(0.5), Dense(d_output)])
def build_task_model(embedding_model: Model, tasks: List[Task], freeze_embedding_weights: bool) -> Model: layers = [embedding_model] if freeze_embedding_weights: layers.append(FreezeWeights()) for task in tasks: layers = task.build_output_model(layers) return Stack(layers)
def __init__(self, input_name: str = 'encoder_output', output_name: str = 'contact_prob'): super().__init__() self._input_name = input_name self._output_name = output_name def concat_pairs(tensor): input_mul = tensor[:, :, None] * tensor[:, None, :] input_sub = tf.abs(tensor[:, :, None] - tensor[:, None, :]) output = tf.concat((input_mul, input_sub), -1) return output self.get_pairwise_feature_vector = Lambda(concat_pairs) self.predict_contact_map = Stack() self.predict_contact_map.add( Conv2D(32, 1, use_bias=True, padding='same', activation='relu')) self.predict_contact_map.add( Conv2D(1, 7, use_bias=True, padding='same', activation='linear'))
def __init__(self, input_name: str = 'encoder_output'): super().__init__() self._input_name = input_name self.convs = Stack([ PaddedConv(1, 32, 129), PaddedConv(1, 32, 257)]) self.bilstm = Stack([ BidirectionalCudnnLSTM(1024, return_sequences=True), BidirectionalCudnnLSTM(1024, return_sequences=True)]) # Need to predict phi, psi, rsa, disorder, ss3, ss8 num_outputs = 0 num_outputs += 1 # phi num_outputs += 2 # psi num_outputs += 1 # rsa num_outputs += 1 # disorder num_outputs += 1 # interface num_outputs += 3 # ss3 num_outputs += 8 # ss8 self.predict_outputs = Dense(num_outputs, activation=None)
def __init__(self, n_symbols: int, n_units: int = 1024, n_layers: int = 3, dropout: Optional[float] = 0.1) -> None: super().__init__(n_symbols) if dropout is None: dropout = 0 self.embedding = Embedding(n_symbols, 128) self.forward_lstm = Stack([ LSTM(n_units, return_sequences=True) for _ in range(n_layers)], name='forward_lstm') self.reverse_lstm = Stack([ LSTM(n_units, return_sequences=True) for _ in range(n_layers)], name='reverse_lstm') self.dropout = Dropout(dropout)
def __init__(self, n_symbols: int, dropout: float = 0, use_pfam_alphabet: bool = True): super().__init__() self._use_pfam_alphabet = use_pfam_alphabet if use_pfam_alphabet: self.embed = Embedding(n_symbols, n_symbols) else: n_symbols = 21 self.embed = Embedding(n_symbols + 1, n_symbols) self.dropout = Dropout(dropout) self.rnn = Stack([ LSTM(1024, return_sequences=True, use_bias=True, implementation=2, recurrent_activation='sigmoid'), LSTM(1024, return_sequences=True, use_bias=True, implementation=2, recurrent_activation='sigmoid')]) self.compute_logits = Dense(n_symbols, use_bias=True, activation='linear')
def __init__(self, n_symbols: int, dropout: float = 0, use_pfam_alphabet: bool = True): if not use_pfam_alphabet: n_symbols = 21 super().__init__(n_symbols) self._use_pfam_alphabet = use_pfam_alphabet self.embed = LMEmbed(n_symbols, dropout) self.dropout = Dropout(dropout) lstm = Stack([ Bidirectional( LSTM(512, return_sequences=True, use_bias=True, recurrent_activation='sigmoid', implementation=2)) for _ in range(3)]) self.rnn = lstm self.proj = Dense(100, use_bias=True, activation='linear') self.random_replace = RandomReplaceMask(0.05, n_symbols)
def __init__(self, n_symbols, length=3000): super().__init__(n_symbols) self._length = length encoder = Stack() encoder.add(Embedding(n_symbols, 128, input_length=self._length)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) encoder.add(BatchNormalization()) encoder.add(MaxPooling1D(2,2)) encoder.add(Flatten()) encoder.add(Dense(1000)) decoder = Stack() decoder.add(Dense(47*256, input_shape=(1000,), activation='relu')) decoder.add(Reshape((47, 256))) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(BatchNormalization()) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(BatchNormalization()) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(BatchNormalization()) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(BatchNormalization()) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(BatchNormalization()) decoder.add(UpSampling1D(2)) decoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu')) decoder.add(Cropping1D((0,8))) self.encoder = encoder self.decoder = decoder
def __init__(self, n_symbols: int, n_layers: int = 35, filters: int = 256, kernel_size: int = 9, layer_norm: bool = True, activation: str = 'elu', dilation_rate: int = 2, dropout: Optional[float] = 0.1) -> None: super().__init__(n_symbols) self.n_symbols = n_symbols self.n_layers = n_layers self.filters = filters self.kernel_size = kernel_size self.layer_norm = layer_norm self.activation = activation self.dilation_rate = dilation_rate self.dropout = dropout print(self) input_embedding = Stack() input_embedding.add(Embedding(n_symbols, 128)) input_embedding.add(Lambda(lambda x: x * np.sqrt(filters))) input_embedding.add(PositionEmbedding()) encoder = Stack() encoder.add(input_embedding) encoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout)) encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation, dilation_rate=1, dropout=dropout)) for layer in range(n_layers - 1): encoder.add(ResidualBlock(1, filters, kernel_size, activation=activation, dilation_rate=dilation_rate, dropout=dropout, add_checkpoint=layer % 5 == 0)) self.encoder = encoder self.z_mu = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0) self.z_var = PaddedConv(1, 4, kernel_size, 1, 'linear', 0.0) decoder = Stack() decoder.add(PaddedConv(1, filters, kernel_size, 1, activation, dropout)) decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation, dilation_rate=1, dropout=dropout)) for layer in range(n_layers - 1): decoder.add(ResidualBlock(1, filters, kernel_size, activation=activation, dilation_rate=dilation_rate, dropout=dropout, add_checkpoint=layer % 5 == 0)) self.decoder = decoder
class MyModel(AbstractTapeModel): @hparams.capture def __init__(self, n_symbols, latent_size=32, max_seq_len=10000): self.latent_size = latent_size self.max_seq_len = max_seq_len super().__init__(n_symbols) self.input_embedding = Embedding(n_symbols, 128) enc = Stack() enc.add( Conv1D(filters=32, kernel_size=7, strides=1, dilation_rate=2, activation='relu')) enc.add( Conv1D(filters=64, kernel_size=5, strides=1, dilation_rate=2, activation='relu')) enc.add( Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.enc_mu = Stack() self.enc_mu.add(enc) self.enc_mu.add(Flatten()) self.enc_mu.add(Dense(latent_size)) self.enc_std = Stack() self.enc_std.add(enc) self.enc_std.add(Flatten()) self.enc_std.add(Dense(latent_size, activation='softplus')) self.dec = Stack() self.dec.add(Dense(1000)) self.dec.add(Reshape((100, 10))) self.dec.add( Conv1DTranspose(filters=128, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.dec.add( Conv1DTranspose(filters=64, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.dec.add( Conv1DTranspose(filters=32, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) def call(self, inputs): sequence = inputs['primary'] embedded = self.input_embedding(sequence) pad_embedded = pad_up_to(embedded, (-1, self.max_seq_len, -1), 0) pad_embedded.set_shape((None, self.max_seq_len, 128)) z_mu = self.enc_mu(pad_embedded) z_std = self.enc_std(pad_embedded) z = z_mu + K.random_normal(K.shape(z_std)) * z_std encoder_output = self.dec(z) inputs['encoder_output'] = encoder_output return inputs def get_optimal_batch_sizes(self): bucket_sizes = np.array( [100, 200, 300, 400, 600, 900, 1000, 1300, 2000, 3000]) batch_sizes = np.array([4, 4, 4, 4, 3, 3, 3, 2, 1, 0.5, 0]) batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(), np.int32) batch_sizes[batch_sizes <= 0] = 1 return bucket_sizes, batch_sizes
def __init__(self, n_symbols, latent_size=32, max_seq_len=10000): self.latent_size = latent_size self.max_seq_len = max_seq_len super().__init__(n_symbols) self.input_embedding = Embedding(n_symbols, 128) enc = Stack() enc.add( Conv1D(filters=32, kernel_size=7, strides=1, dilation_rate=2, activation='relu')) enc.add( Conv1D(filters=64, kernel_size=5, strides=1, dilation_rate=2, activation='relu')) enc.add( Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.enc_mu = Stack() self.enc_mu.add(enc) self.enc_mu.add(Flatten()) self.enc_mu.add(Dense(latent_size)) self.enc_std = Stack() self.enc_std.add(enc) self.enc_std.add(Flatten()) self.enc_std.add(Dense(latent_size, activation='softplus')) self.dec = Stack() self.dec.add(Dense(1000)) self.dec.add(Reshape((100, 10))) self.dec.add( Conv1DTranspose(filters=128, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.dec.add( Conv1DTranspose(filters=64, kernel_size=3, strides=1, dilation_rate=2, activation='relu')) self.dec.add( Conv1DTranspose(filters=32, kernel_size=3, strides=1, dilation_rate=2, activation='relu'))
def build_output_model( self, layers: List[tf.keras.Model]) -> List[tf.keras.Model]: ssa = SoftSymmetricAlignment(Stack(layers)) return [ssa, OrdinalRegression(5)]
def __init__(self, n_symbols, n_layers=5, length=3000, latent_size=1000, n_filters=256, kernel_size=5, pooling_type='average', dropout=0): super().__init__(n_symbols) self._n_layers = n_layers self._length = length self._latent_size = latent_size self._kernel_size = kernel_size self._n_filters = n_filters pool = AveragePooling1D if pooling_type == 'average' else MaxPooling1D input_embedding = Stack() input_embedding.add( Embedding(n_symbols, 128, input_length=self._length)) input_embedding.add(Lambda(lambda x: x * np.sqrt(n_filters))) input_embedding.add(PositionEmbedding()) input_embedding.add( PaddedConv(1, n_filters, kernel_size, 1, activation='relu', dropout=dropout)) encoder = Stack() encoder.add(input_embedding) for _ in range(6): for _ in range(n_layers): encoder.add( ResidualBlock(1, n_filters, kernel_size, activation='relu', dilation_rate=1, dropout=dropout)) encoder.add(pool(2, 2)) latent = Stack() latent.add(Flatten()) latent.add(Dense(self._latent_size)) decoder = Stack() decoder.add( Dense(47 * n_filters, input_shape=(self._latent_size, ), activation='relu')) decoder.add(Reshape((47, n_filters))) for _ in range(6): decoder.add(UpSampling1D(2)) for _ in range(n_layers): encoder.add( ResidualBlock(1, n_filters, kernel_size, activation='relu', dilation_rate=1, dropout=dropout)) decoder.add(Cropping1D((0, 8))) self.encoder = encoder self.decoder = decoder self.latent = latent
def __init__(self, n_classes: int, input_name: str = 'encoder_output', output_name: str = 'logits'): super().__init__() self._input_name = input_name self._output_name = output_name def max_pool_30(x): maxpool, _ = tf.nn.top_k(x, 30) return maxpool conv6 = Stack( [DeepSFConv(10, 6) for _ in range(10)] + [Permute([2, 1]), Lambda(max_pool_30), Flatten()]) conv10 = Stack( [DeepSFConv(10, 10) for _ in range(10)] + [Permute([2, 1]), Lambda(max_pool_30), Flatten()]) output_model = Stack() # Make conv layers output_model.add(Concatenate(-1)) output_model.add( Dense(500, activation='relu', kernel_initializer='he_normal', kernel_constraint=tf.keras.constraints.max_norm(3))) output_model.add(Dropout(0.2)) output_model.add(Dense(n_classes, kernel_initializer='he_normal')) self.conv6 = conv6 self.conv10 = conv10 self.output_model = output_model