Ejemplo n.º 1
0
        z_concat = tf.reshape(z, (tf.shape(z)[0], -1))
        z_concat = tf.pad(z_concat, [[0, 0], [0, 5000-tf.shape(z_concat)[1]]], 'CONSTANT', constant_values=-100)
        z_concat.set_shape((None, 5000))
        inputs['global_emb'] = z_concat
        return inputs

    def get_optimal_batch_sizes(self) -> Tuple[List[int], List[int]]:
        bucket_sizes = np.array([100, 200, 300, 400, 600, 900, 1000, 1300, 2000, 3000])
        batch_sizes = np.array([1.5, 1.5, 1.5, 1.5, 1, 1, 1, 0, 0, 0, 0])

        batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(), np.int32)
        batch_sizes[batch_sizes <= 0] = 1
        return bucket_sizes, batch_sizes

# Register the model
ModelBuilder.add_model('myres', MyRes)

class MyAE(AbstractTapeModel):
    def __init__(self, n_symbols, length=3000):
        super().__init__(n_symbols)
        self._length = length
        
        encoder = Stack()
        encoder.add(Embedding(n_symbols, 128, input_length=self._length))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
        encoder.add(BatchNormalization())
        encoder.add(MaxPooling1D(2,2))
        encoder.add(Conv1D(256, 5, strides=1, padding='same', dilation_rate=1, activation='relu'))
Ejemplo n.º 2
0
        sequence = inputs['primary']
        inputs['encoder_output'] = self.embedding(sequence)
        return inputs

    def get_optimal_batch_sizes(self):
        bucket_sizes = np.array(
            [100, 200, 300, 400, 600, 900, 1000, 1200, 1300, 2000, 3000])
        batch_sizes = np.array([10, 10, 10, 10, 10, 10, 5, 5, 5, 5, 5, 5])

        batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(),
                                 np.int32)
        batch_sizes[batch_sizes <= 0] = 1
        return bucket_sizes, batch_sizes


ModelBuilder.add_model('w2v', Word2Vec)

from tape.analysis import get_config
from tape.__main__ import proteins
import argparse
import os


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('outdir')
    parser.add_argument('--datafile', default='')
    args = parser.parse_args()

    config = get_config(args.outdir)
    task = config['tasks']
Ejemplo n.º 3
0
        # Return the dictionary
        return inputs

    def get_optimal_batch_sizes(self) -> Tuple[List[int], List[int]]:
        """Need to figure out what batch sizes to use for different sequence lengths.
        You can make this pretty fine-grained but here we're just going to say use
        a batch of 64 for sequences of length < 500, a batch of 32 for sequences of length
        < 1000, a batch of 16 for sequences of length < 1500, a batch of 8 for sequences
        of length < 2000, and a batch of 4 for anything longer"""

        # Define the bucket sizes we care about
        bucket_sizes = [500, 1000, 1500, 2000]

        # Define the batch sizes we care about
        # (1 more than bucket sizes to handle things larger than largest bucket size)
        batch_sizes = [64, 32, 16, 8, 4]

        return np.array(bucket_sizes), np.array(batch_sizes)


# Register the model
ModelBuilder.add_model('my_simple_model', MySimpleModel)

if __name__ == '__main__':
    # Run tape from this file! This will ensure that the below model registration code
    # is run. If you want to run tape normally, you'll need to modify the code to register
    # your model from within tape.
    from tape.__main__ import proteins
    proteins.run_commandline()
Ejemplo n.º 4
0
        pad_embedded = pad_up_to(embedded, (-1, self.max_seq_len, -1), 0)
        pad_embedded.set_shape((None, self.max_seq_len, 128))

        z_mu = self.enc_mu(pad_embedded)
        z_std = self.enc_std(pad_embedded)
        z = z_mu + K.random_normal(K.shape(z_std)) * z_std

        encoder_output = self.dec(z)

        inputs['encoder_output'] = encoder_output
        return inputs

    def get_optimal_batch_sizes(self):
        bucket_sizes = np.array(
            [100, 200, 300, 400, 600, 900, 1000, 1300, 2000, 3000])
        batch_sizes = np.array([4, 4, 4, 4, 3, 3, 3, 2, 1, 0.5, 0])

        batch_sizes = np.asarray(batch_sizes * self._get_gpu_memory(),
                                 np.int32)
        batch_sizes[batch_sizes <= 0] = 1
        return bucket_sizes, batch_sizes


#%% Register
ModelBuilder.add_model('my_model', MyModel)

#%%
if __name__ == '__main__':
    from tape.__main__ import proteins
    proteins.run_commandline()
Ejemplo n.º 5
0
hparams = Ingredient('my_hparams')


@hparams.config
def model_cfg():
    filters = 32  # must have the same name as the hyperparameters below


class MySimpleModelWithHparams(MySimpleModel):
    @hparams.capture
    def __init__(
        self,
        n_symbols: int,  # This argument is required!
        filters:
        int = 32  # This gets captured by Sacred so you can pass values in
    ) -> None:
        super().__init__(n_symbols, filters)
        print("Creating Model with {} filters".format(self.conv1d.filters))


# Register the model and hparams
ModelBuilder.add_model('my_simple_model_with_hparams',
                       MySimpleModelWithHparams, hparams)

if __name__ == '__main__':
    # Run tape from this file! This will ensure that the below model registration code
    # is run. If you want to run tape normally, you'll need to modify the code to register
    # your model from within tape.
    from tape.__main__ import proteins
    proteins.run_commandline()