Beispiel #1
0
def test_memory():
    stoichiometric_matrix, rates, initial_state, final_state = load_complexation(
    )
    duration = 1
    amplify = 100

    this = psutil.Process(os.getpid())
    memory = memory_previous = this.memory_info().rss
    memory_increases = 0
    print('initial memory use: {}'.format(memory))

    system = StochasticSystem(stoichiometric_matrix,
                              random_seed=np.random.randint(2**31))

    obsidian_start = seconds_since_epoch()
    for i in range(1, amplify + 1):
        memory = this.memory_info().rss
        if memory > memory_previous:
            print('memory use before iteration {:2d}: {}'.format(i, memory))
            memory_previous = memory
            memory_increases += 1

        result = system.evolve(duration, initial_state, rates)
        difference = np.abs(final_state - result['outcome']).sum()

        if difference:
            print('difference is {}'.format(difference))
    obsidian_end = seconds_since_epoch()

    print('obsidian C implementation elapsed seconds for {} runs: {}'.format(
        amplify, obsidian_end - obsidian_start))
    assert memory_increases <= 1
Beispiel #2
0
def test_get_set_random_state():
    stoich = np.array([[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]])
    system = StochasticSystem(stoich)

    state = np.array([1000, 1000, 0, 0])
    rates = np.array([3.0, 1.0, 1.0])

    system.evolve(1, state, rates)

    rand_state = system.obsidian.get_random_state()

    result_1 = system.evolve(1, state, rates)
    result_2 = system.evolve(1, state, rates)

    with np.testing.assert_raises(AssertionError):
        for key in ('time', 'events', 'occurrences', 'outcome'):
            np.testing.assert_array_equal(result_1[key], result_2[key])

    system.obsidian.set_random_state(*rand_state)
    result_1_again = system.evolve(1, state, rates)

    for key in ('time', 'events', 'occurrences', 'outcome'):
        np.testing.assert_array_equal(result_1[key], result_1_again[key])
Beispiel #3
0
def test_complexation():
    fixtures_root = os.path.join('data', 'complexation')

    def load_state(filename):
        with open(os.path.join(fixtures_root, filename)) as f:
            state = np.array(json.load(f))

        return state

    initial_state = load_state('initial_state.json')
    final_state = load_state('final_state.json')

    assert initial_state.size == final_state.size

    n_metabolites = initial_state.size

    with open(os.path.join(fixtures_root, 'stoichiometry.json')) as f:
        stoichiometry_sparse = json.load(f)

    n_reactions = len(stoichiometry_sparse)

    stoichiometric_matrix = np.zeros((n_metabolites, n_reactions), np.int64)

    for (reaction_index, reaction_stoich) in enumerate(stoichiometry_sparse):
        for (str_metabolite_index, stoich) in reaction_stoich.viewitems():
            # JSON doesn't allow for integer keys...
            metabolite_index = int(str_metabolite_index)
            stoichiometric_matrix[metabolite_index, reaction_index] = stoich

    duration = 1

    # semi-quantitative rate constants
    rates = np.full(n_reactions, 10)

    system = StochasticSystem(stoichiometric_matrix, rates)

    time, counts, events = system.evolve(initial_state, duration)

    assert(len(time)-1 == events.sum())

    outcome = counts[-1]
    difference = (final_state - outcome)

    total = np.abs(difference).sum()

    print('differences: {}'.format(total))
    print('total steps: {}'.format(len(time)))
    print(time)

    return (time, counts, events)
Beispiel #4
0
def test_hang():
    # TODO: Use a pytest plug-in to timeout after some threshold.

    seed = 807952948

    stoich = np_load('stoich.npy')
    mol = np_load('complex-counts.npy')
    rates = np_load('rates.npy')

    system = StochasticSystem(stoich, random_seed=seed)
    for i in range(7300):
        if i % 100 == 0:
            print(i)

        result = system.evolve(1, mol, rates)
Beispiel #5
0
def test_equilibration():
    stoichiometric_matrix = np.array([
        [-1, +1,  0],
        [+1, -1, -1]])

    rates = np.array([10, 10, 0.1])
    system = StochasticSystem(stoichiometric_matrix, rates)

    state = np.array([1000, 0])
    duration = 10

    time, counts, events = system.evolve(state, duration)

    assert counts[-1].sum() < state.sum()
    assert time[-1] <= duration

    return (time, counts, events)
Beispiel #6
0
def test_dimerization():
    stoichiometric_matrix = np.array([
        [-1, -2, +1],
        [-1,  0, +1],
        [+1,  0, -1],
        [ 0, +1,  0]])

    rates = np.array([3, 1, 1]) * 0.01
    system = StochasticSystem(stoichiometric_matrix, rates)

    state = np.array([1000, 1000, 0, 0])
    duration = 1

    time, counts, events = system.evolve(state, duration)

    assert time[-1] <= duration

    return (time, counts, events)
Beispiel #7
0
def test_flagella():
    stoichiometry = np.array([[
        0, 0, 0, 0, 0, -4, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0
    ],
                              [
                                  -26, -34, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  0, 0, 0, 1, 0, 0, 0, 0
                              ],
                              [
                                  0, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1,
                                  -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  0, 0, 0, 0, 0, 0, 1, 0, 0
                              ],
                              [
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -12,
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  0, 0, 0, 0, -1, 1, 0
                              ],
                              [
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2,
                                  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0,
                                  0, 0, 0, 0, -1, 0, 0, 0, 1
                              ],
                              [
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -5,
                                  -120, 0, 0, 1, 0, -1, -1
                              ]])

    substrate = np.array([
        21, 1369, 69, 4, 1, 1674, 0, 48, 53, 49, 61, 7, 3, 28, 151, 0, 26, 20,
        3, 3, 9, 16, 7, 26, 280, 26, 64, 67, 310, 13559, 208, 16, 22, 0, 17, 0
    ], np.int64)

    rates = np.array([1.e-05, 1.e-05, 1.e-05, 1.e-05, 1.e-05, 1.e-05])

    arrow = StochasticSystem(stoichiometry)
    result = arrow.evolve(1.0, substrate, rates)

    print('flagella result: {}'.format(result))
Beispiel #8
0
def test_obsidian():
    stoichiometric_matrix = np.array(
        [[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]], np.int64)

    rates = np.array([3, 1, 1]) * 0.01

    arrow = StochasticSystem(stoichiometric_matrix)
    result = arrow.evolve(1.0, np.array([50, 20, 30, 40], np.int64), rates)

    print('steps: {}'.format(result['steps']))
    print('time: {}'.format(result['time']))
    print('events: {}'.format(result['events']))
    print('occurrences: {}'.format(result['occurrences']))
    print('outcome: {}'.format(result['outcome']))

    assert (arrow.obsidian.reactions_count() == stoichiometric_matrix.shape[0])
    assert (
        arrow.obsidian.substrates_count() == stoichiometric_matrix.shape[1])

    return result
Beispiel #9
0
def test_pickle():
    stoichiometric_matrix = np.array(
        [[1, 1, -1, 0], [-2, 0, 0, 1], [-1, -1, 1, 0]], np.int64)

    rates = np.array([3, 1, 1]) * 0.01

    arrow = StochasticSystem(stoichiometric_matrix)

    pickled_arrow = pickle.dumps(arrow)
    unpickled_arrow = pickle.loads(pickled_arrow)

    result = arrow.evolve(1.0, np.array([50, 20, 30, 40], np.int64), rates)

    straight = test_obsidian()

    assert (result['steps'] == straight['steps'])
    assert ((result['time'] == straight['time']).all())
    assert ((result['events'] == straight['events']).all())
    assert ((result['occurrences'] == straight['occurrences']).all())
    assert ((result['outcome'] == straight['outcome']).all())

    print('arrow object pickled is {} bytes'.format(len(pickled_arrow)))
Beispiel #10
0
def test_compare_runtime():
    stoichiometric_matrix, rates, initial_state, final_state = load_complexation(
    )
    duration = 1
    amplify = 100

    reference = GillespieReference(stoichiometric_matrix)
    reference_start = seconds_since_epoch()
    for i in range(amplify):
        result = reference.evolve(duration, initial_state, rates)
    reference_end = seconds_since_epoch()

    system = StochasticSystem(stoichiometric_matrix)
    obsidian_start = seconds_since_epoch()
    for i in range(amplify):
        result = system.evolve(duration, initial_state, rates)
    obsidian_end = seconds_since_epoch()

    print('reference Python implementation elapsed seconds: {}'.format(
        reference_end - reference_start))
    print(
        'obsidian C implementation elapsed seconds: {}'.format(obsidian_end -
                                                               obsidian_start))
Beispiel #11
0
class Translation(Process):

    name = 'translation'
    defaults = {

        'sequences': {
            ('oA', 'eA'): A,
            ('oAZ', 'eA'): A,
            ('oAZ', 'eZ'): Z,
            ('oB', 'eB'): B,
            ('oBY', 'eB'): B,
            ('oBY', 'eY'): Y},

        'templates': {
            ('oA', 'eA'): generate_template(('oA', 'eA'), 20, ['eA']),
            ('oAZ', 'eA'): generate_template(('oAZ', 'eA'), 20, ['eA']),
            ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 60, ['eZ']),
            ('oB', 'eB'): generate_template(('oB', 'eB'), 30, ['eB']),
            ('oBY', 'eB'): generate_template(('oBY', 'eB'), 30, ['eB']),
            ('oBY', 'eY'): generate_template(('oBY', 'eY'), 40, ['eY'])},

        'transcript_affinities': {
            ('oA', 'eA'): 1.0,
            ('oAZ', 'eA'): 2.0,
            ('oAZ', 'eZ'): 5.0,
            ('oB', 'eB'): 1.0,
            ('oBY', 'eB'): 2.0,
            ('oBY', 'eY'): 5.0},

        'elongation_rate': 5.0,
        'polymerase_occlusion': 10,
        'symbol_to_monomer': amino_acids,
        'monomer_ids': monomer_ids,
        'concentration_keys': [],

        'mass_deriver_key': 'mass_deriver',
        'concentrations_deriver_key': 'translation_concentrations',
        'time_step': 1.0,
    }

    def __init__(self, initial_parameters=None):
        '''A stochastic translation model

        .. WARNING::
            Vivarium's knowledge base uses the gene name to name the
            protein. This means that for a gene acrA that codes for
            protein ArcA, you must refer to the gene, transcript, and
            protein each as acrA.

        .. DANGER::
            This documentation will need to be updated to reflect the
            changes in `#185
            <https://github.com/CovertLab/vivarium/pull/185>`_

        :term:`Ports`:

        * **ribosomes**: Expects the ``ribosomes`` variable, whose
          value is a list of the configurations of the ribosomes
          currently active.
        * **molecules**: Expects variables for each of the RNA
          nucleotides.
        * **transcripts**: Expects variables for each transcript to
          translate. Translation will read transcripts from this port.
        * **proteins**: Expects variables for each protein product. The
          produced proteins will be added to this port as counts.
        * **concentrations**: Expects variables for each key in
          ``concentration_keys``. This will be used by a :term:`deriver`
          to convert counts to concentrations.

        Arguments:
            initial_parameters: A dictionary of configuration options.
                Accepts the following keys:

                * **sequences** (:py:class:`dict`): Maps from operon
                  name to the RNA sequence of the operon, as a
                  :py:class:`str`.
                * **templates** (:py:class:`dict`): Maps from the name
                  of an transcript to a :term:`template specification`.
                  The template specification may be generated by
                  :py:func:`cell.library.polymerize.generate_template`
                  like so:

                  >>> from vivarium_cell.library.polymerize import (
                  ...     generate_template)
                  >>> from vivarium.library.pretty import format_dict
                  >>> terminator_index = 5
                  >>> template = generate_template(
                  ...     'oA', terminator_index, ['product1'])
                  >>> print(format_dict(template))
                  {
                      "direction": 1,
                      "id": "oA",
                      "position": 0,
                      "sites": [],
                      "terminators": [
                          {
                              "position": 5,
                              "products": [
                                  "product1"
                              ],
                              "strength": 1.0
                          }
                      ]
                  }


                * **transcript_affinities** (:py:class:`dict`): A map
                  from the name of a transcript to the binding affinity
                  (a :py:class:`float`) of the ribosome for the
                  transcript.
                * **elongation_rate** (:py:class:`float`): The
                  elongation rate of the ribosome.

                  .. todo:: Units of elongation rate

                * **polymerase_occlusion** (:py:class:`int`): The number
                  of base pairs behind the polymerase where another
                  polymerase is occluded and so cannot bind.
                * **symbol_to_monomer** (:py:class:`dict`): Maps from
                  the symbols used to represent monomers in the RNA
                  sequence to the name of the free monomer. This should
                  generally be
                  :py:data:`cell.data.amino_acids.amino_acids`.
                * **monomer_ids** (:py:class:`list`): A list of the
                  names of the free monomers consumed by translation.
                  This can generally be computed as:

                  >>> import pprint
                  >>>
                  >>> from vivarium_cell.data.amino_acids import amino_acids
                  >>> monomer_ids = amino_acids.values()
                  >>> pp = pprint.PrettyPrinter()
                  >>> pp.pprint(list(monomer_ids))
                  ['Alanine',
                   'Arginine',
                   'Asparagine',
                   'Aspartate',
                   'Cysteine',
                   'Glutamate',
                   'Glutamine',
                   'Glycine',
                   'Histidine',
                   'Isoleucine',
                   'Leucine',
                   'Lysine',
                   'Methionine',
                   'Phenylalanine',
                   'Proline',
                   'Serine',
                   'Threonine',
                   'Tryptophan',
                   'Tyrosine',
                   'Valine']

                  Note that we only included the `list()` transformation
                  to make the output prettier. The `dict_values` object
                  returned by `.values()` is sufficiently list-like for
                  use here. Also note that :py:mod:`pprint` just makes
                  the output prettier.
                * **concentration_keys** (:py:class:`list`): A list of
                  variables you want to be able to access as
                  concentrations from the *concentrations* port. The
                  actual conversion is handled by a deriver.

        Example configuring the process (uses
        :py:func:vivarium.library.pretty.format_dict):

        >>> from vivarium.library.pretty import format_dict
        >>> from vivarium_cell.data.amino_acids import amino_acids
        >>> from vivarium_cell.library.polymerize import generate_template
        >>> random.seed(0)  # Needed because process is stochastic
        >>> np.random.seed(0)
        >>> configurations = {
        ...     'sequences': {
        ...         ('oA', 'eA'): 'AWDPT',
        ...         ('oAZ', 'eZ'): 'YVEGELENGGMFISC',
        ...     },
        ...     'templates': {
        ...         ('oA', 'eA'): generate_template(('oA', 'eA'), 5, ['eA']),
        ...         ('oAZ', 'eZ'): generate_template(('oAZ', 'eZ'), 15, ['eA', 'eZ']),
        ...     },
        ...     'transcript_affinities': {
        ...         ('oA', 'eA'): 1.0,
        ...         ('oAZ', 'eZ'): 1.0,
        ...     },
        ...     'elongation_rate': 10.0,
        ...     'polymerase_occlusion': 10,
        ...     'symbol_to_monomer': amino_acids,
        ...     'monomer_ids': amino_acids.values(),
        ...     'concentration_keys': []
        ... }
        >>> # make the translation process, and initialize the states
        >>> translation = Translation(configurations)  # doctest:+ELLIPSIS
        >>> states = {
        ...     'ribosomes': {},
        ...     'molecules': {},
        ...     'proteins': {UNBOUND_RIBOSOME_KEY: 2},
        ...     'transcripts': {
        ...         'oA': 10,
        ...         'oAZ': 10,
        ...     }
        ... }
        >>> states['molecules'].update(
        ...     {
        ...         molecule_id: 100
        ...         for molecule_id in translation.monomer_ids
        ...     }
        ... )
        >>> update = translation.next_update(1, states)
        >>> print(update['ribosomes'])
        {'_add': [{'path': (1,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 1, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}, {'path': (2,), 'state': <class 'vivarium_cell.processes.translation.Ribosome'>: {'id': 2, 'state': 'occluding', 'position': 9, 'template': ('oAZ', 'eZ'), 'template_index': 0, 'terminator': 0}}], '_delete': []}
        '''

        if not initial_parameters:
            initial_parameters = {}

        self.monomer_symbols = list(amino_acids.keys())
        self.monomer_ids = list(amino_acids.values())

        self.default_parameters = copy.deepcopy(self.defaults)

        templates = self.or_default(initial_parameters, 'templates')

        self.default_parameters['protein_ids'] = all_products({
            key: Template(config)
            for key, config in templates.items()})

        self.default_parameters['transcript_order'] = list(
            initial_parameters.get(
                'transcript_affinities',
                self.default_parameters['transcript_affinities']).keys())
        self.default_parameters['molecule_ids'] = self.monomer_ids

        self.parameters = copy.deepcopy(self.default_parameters)
        self.parameters.update(initial_parameters)

        self.sequences = self.parameters['sequences']
        self.templates = self.parameters['templates']

        self.transcript_affinities = self.parameters['transcript_affinities']
        self.operons = gather_genes(self.transcript_affinities)
        self.operon_order = list(self.operons.keys())
        self.transcript_order = self.parameters['transcript_order']
        self.transcript_count = len(self.transcript_order)

        self.monomer_ids = self.parameters['monomer_ids']
        self.molecule_ids = self.parameters['molecule_ids']
        self.molecule_ids.extend(['ATP', 'ADP'])

        self.protein_ids = self.parameters['protein_ids']
        self.symbol_to_monomer = self.parameters['symbol_to_monomer']
        self.elongation = 0
        self.elongation_rate = self.parameters['elongation_rate']
        self.polymerase_occlusion = self.parameters['polymerase_occlusion']
        self.concentration_keys = self.parameters['concentration_keys']

        self.affinity_vector = np.array([
            self.transcript_affinities[transcript_key]
            for transcript_key in self.transcript_order], dtype=np.float64)

        self.stoichiometry = build_stoichiometry(self.transcript_count)

        self.initiation = StochasticSystem(self.stoichiometry)

        self.ribosome_id = 0

        self.protein_keys = self.concentration_keys + self.protein_ids
        self.all_protein_keys = self.protein_keys + [UNBOUND_RIBOSOME_KEY]

        self.mass_deriver_key = self.or_default(initial_parameters, 'mass_deriver_key')
        self.concentrations_deriver_key = self.or_default(
            initial_parameters, 'concentrations_deriver_key')

        log.info('translation parameters: {}'.format(self.parameters))

        super(Translation, self).__init__(self.parameters)

    def ports_schema(self):

        def add_mass(schema, masses, key):
            if '_properties' not in schema:
                schema['_properties'] = {}
            if key in masses:
                schema['_properties']['mw'] = masses[key]
            return schema

        return {
            'ribosomes': {
                '*': {
                    'id': {
                        '_default': -1,
                        '_updater': 'set'},
                    'domain': {
                        '_default': 0,
                        '_updater': 'set'},
                    'state': {
                        '_default': None,
                        '_updater': 'set',
                        '_emit': True},
                    'position': {
                        '_default': 0,
                        '_updater': 'set',
                        '_emit': True},
                    'template': {
                        '_default': None,
                        '_updater': 'set',
                        '_emit': True},
                    'template_index': {
                        '_default': 0,
                        '_updater': 'set',
                        '_emit': True}}},

            'global': {},

            'molecules': {
                molecule: add_mass({
                    '_emit': True,
                    '_default': 0,
                    '_divider': 'split'}, molecular_weight, molecule)
                for molecule in self.molecule_ids},

            'transcripts': {
                transcript: add_mass({
                    '_default': 0,
                    '_divider': 'split'}, molecular_weight, transcript)
                for transcript in list(self.operons.keys())},

            'proteins': {
                protein: add_mass({
                    '_default': 0,
                    '_divider': 'split',
                    '_emit': True}, molecular_weight, protein)
                for protein in self.all_protein_keys},

            'concentrations': {
                molecule: {
                    '_default': 0.0,
                    '_updater': 'set'}
                for molecule in self.protein_keys}}

    def derivers(self):
        return {
            self.mass_deriver_key: {
                'deriver': 'mass_deriver',
                'port_mapping': {
                    'global': 'global'}},
            self.concentrations_deriver_key: {
                'deriver': 'concentrations_deriver',
                'port_mapping': {
                    'global': 'global',
                    'counts': 'proteins',
                    'concentrations': 'concentrations'},
                'config': {
                    'concentration_keys': self.protein_keys}}}

    def next_update(self, timestep, states):
        molecules = states['molecules']
        transcripts = states['transcripts']
        proteins = states['proteins']

        ribosomes = {
            id: Ribosome(ribosome)
            for id, ribosome in states['ribosomes'].items()}

        original_ribosome_keys = ribosomes.keys()

        gene_counts = np.array(
            list(transcripts_to_gene_counts(transcripts, self.operons).values()),
            dtype=np.int64)

        # Find out how many transcripts are currently blocked by a
        # newly initiated ribosome
        bound_transcripts = np.zeros(self.transcript_count, dtype=np.int64)
        ribosomes_by_transcript = {
            transcript_key: []
            for transcript_key in self.transcript_order}
        for ribosome in ribosomes.values():
            ribosomes_by_transcript[ribosome.template].append(ribosome)
        for index, transcript in enumerate(self.transcript_order):
            bound_transcripts[index] = len([
                ribosome
                for ribosome in ribosomes_by_transcript[transcript]
                if ribosome.is_bound()])

        # Make the state for a gillespie simulation out of total number of each
        # transcript not blocked by a bound ribosome, concatenated with the number
        # of each transcript that is bound by a ribosome.
        # These are the two states for each transcript the simulation
        # will operate on, essentially going back and forth between
        # bound and unbound states.

        original_unbound_ribosomes = proteins[UNBOUND_RIBOSOME_KEY]
        monomer_limits = {
            monomer: molecules[monomer]
            for monomer in self.monomer_ids}
        unbound_ribosomes = original_unbound_ribosomes

        templates = {
            key: Template(template)
            for key, template in self.templates.items()}

        time = 0
        now = 0
        elongation = Elongation(
            self.sequences,
            templates,
            monomer_limits,
            self.symbol_to_monomer,
            self.elongation)

        while time < timestep:
            # build the state vector for the gillespie simulation
            substrate = np.concatenate([
                gene_counts - bound_transcripts,
                bound_transcripts,
                [unbound_ribosomes]])

            # find number of monomers until next terminator
            distance = 1 / self.elongation_rate

            # find interval of time that elongates to the point of the next terminator
            interval = min(distance, timestep - time)

            if interval == distance:
                # perform the elongation until the next event
                terminations, monomer_limits, ribosomes = elongation.step(
                    interval,
                    monomer_limits,
                    ribosomes)
                unbound_ribosomes += terminations
            else:
                elongation.store_partial(interval)
                terminations = 0

            # run simulation for interval of time to next terminator
            result = self.initiation.evolve(
                interval,
                substrate,
                self.affinity_vector)

            # go through each event in the simulation and update the state
            ribosome_bindings = 0
            for now, event in zip(result['time'], result['events']):
                # ribosome has bound the transcript
                transcript_key = self.transcript_order[event]
                bound_transcripts[event] += 1

                self.ribosome_id += 1
                new_ribosome = Ribosome({
                    'id': self.ribosome_id,
                    'template': transcript_key,
                    'position': 0})
                new_ribosome.bind()
                new_ribosome.start_polymerizing()
                ribosomes[new_ribosome.id] = new_ribosome

                ribosome_bindings += 1
                unbound_ribosomes -= 1

            # deal with occluding rnap
            for ribosome in ribosomes.values():
                if ribosome.is_unoccluding(self.polymerase_occlusion):
                    bound_transcripts[ribosome.template_index] -= 1
                    ribosome.unocclude()

            time += interval

        # track how far elongation proceeded to start from next iteration
        self.elongation = elongation.elongation - int(elongation.elongation)

        proteins = {
            UNBOUND_RIBOSOME_KEY: unbound_ribosomes - original_unbound_ribosomes}
        proteins.update(elongation.complete_polymers)

        molecules = {
            key: count * -1
            for key, count in elongation.monomers.items()}

        original = set(original_ribosome_keys)
        current = set(ribosomes.keys())
        bound_ribosomes = current - original
        completed_ribosomes = original - current
        continuing_ribosomes = original - completed_ribosomes

        # ATP hydrolysis cost is 2 per amino acid elongation
        molecules['ATP'] = 0
        molecules['ADP'] = 0
        for count in elongation.monomers.values():
            molecules['ATP'] -= 2 * count
            molecules['ADP'] += 2 * count

        ribosome_updates = {
            id: ribosomes[id]
            for id in continuing_ribosomes}

        add_ribosomes = [
            {'path': (bound,), 'state': ribosomes[bound]}
            for bound in bound_ribosomes]

        delete_ribosomes = [
            (completed,)
            for completed in completed_ribosomes]

        ribosome_updates['_add'] = add_ribosomes
        ribosome_updates['_delete'] = delete_ribosomes

        update = {
            'ribosomes': ribosome_updates,
            'molecules': molecules,
            'proteins': proteins}

        return update
Beispiel #12
0
class Complexation(Process):
    defaults = {
        'monomer_ids': chromosome.complexation_monomer_ids,
        'complex_ids': chromosome.complexation_complex_ids,
        'stoichiometry': chromosome.complexation_stoichiometry,
        'rates': chromosome.complexation_rates,
        'mass_deriver_key': 'mass_deriver'
    }

    def __init__(self, initial_parameters=None):
        if not initial_parameters:
            initial_parameters = {}

        super(Complexation, self).__init__(initial_parameters)

        self.derive_defaults('stoichiometry', 'reaction_ids', keys_list)

        self.monomer_ids = self.parameters['monomer_ids']
        self.complex_ids = self.parameters['complex_ids']
        self.reaction_ids = self.parameters['reaction_ids']

        self.stoichiometry = self.parameters['stoichiometry']
        self.rates = self.parameters['rates']

        self.complexation_stoichiometry, self.complexation_rates = build_complexation_stoichiometry(
            self.stoichiometry, self.rates, self.reaction_ids,
            self.monomer_ids, self.complex_ids)

        self.complexation = StochasticSystem(self.complexation_stoichiometry)

        self.mass_deriver_key = self.or_default(initial_parameters,
                                                'mass_deriver_key')

    def ports_schema(self):
        return {
            'monomers': {
                monomer: {
                    '_default': 0,
                    '_emit': True,
                    '_properties': {
                        'mw': molecular_weight[monomer]
                    } if monomer in molecular_weight else {}
                }
                for monomer in self.monomer_ids
            },
            'complexes': {
                complex: {
                    '_default': 0,
                    '_emit': True,
                    '_properties': {
                        'mw': molecular_weight[complex]
                    } if complex in molecular_weight else {}
                }
                for complex in self.complex_ids
            },
            'global': {}
        }

    def derivers(self):
        return {
            self.mass_deriver_key: {
                'deriver': 'mass',
                'port_mapping': {
                    'global': 'global'
                }
            }
        }

    def next_update(self, timestep, states):
        monomers = states['monomers']
        complexes = states['complexes']

        substrate = np.zeros(len(self.monomer_ids) + len(self.complex_ids),
                             dtype=np.int64)

        for index, monomer_id in enumerate(self.monomer_ids):
            substrate[index] = monomers[monomer_id]
        for index, complex_id in enumerate(self.complex_ids):
            substrate[index + len(self.monomer_ids)] = complexes[complex_id]

        result = self.complexation.evolve(timestep, substrate,
                                          self.complexation_rates)

        outcome = result['outcome'] - substrate

        monomers_update = {
            monomer_id: outcome[index]
            for index, monomer_id in enumerate(self.monomer_ids)
        }

        complexes_update = {
            complex_id: outcome[index + len(self.monomer_ids)]
            for index, complex_id in enumerate(self.complex_ids)
        }

        update = {'monomers': monomers_update, 'complexes': complexes_update}

        return update
Beispiel #13
0
import json

from arrow import StochasticSystem
import numpy as np

duration = 2**31

with open('data/complexation/large-initial.json') as f:
    data = json.load(f)

stoich = np.array(data['stoich'])
rates = np.array(data['rates']) * 1e-30
counts = np.array(data['counts'])

system = StochasticSystem(stoich.T, random_seed=0)

while True:
    result = system.evolve(duration, counts, rates)

    updated_counts = result['outcome']

    if not np.any(counts - updated_counts):
        break

    if np.any(updated_counts < 0):
        raise Exception('Negative counts')

    counts = updated_counts
    print(counts)
Beispiel #14
0
class Transcription(Process):

    name = 'transcription'
    defaults = {
        'promoter_affinities': {},
        'transcription_factors': [],
        'sequence': '',
        'templates': {},
        'genes': {},
        'elongation_rate': 1.0,
        'polymerase_occlusion': 5,
        'symbol_to_monomer': nucleotides,
        'monomer_ids': monomer_ids,
        'concentrations_deriver_key': 'transcription_concentrations',
        'initial_domains': {
            0: {
                'id': 0,
                'lead': 0,
                'lag': 0,
                'children': []
            }
        },
        'molecule_ids': monomer_ids,
        'time_step': 1.0,
    }

    def __init__(self, initial_parameters=None):
        '''A stochastic transcription model

        .. WARNING:: Vivarium's knowledge base uses the gene name to
            name the protein. This means that for a gene acrA that
            codes for protein AcrA, you must refer to the gene,
            transcript, and protein each as acrA.

        :term:`Ports`:

        * **chromosome**: The linked :term:`store` should hold a
          representation of the chromosome in the form returned by
          :py:meth:`vivarium.states.chromosome.Chromosome.to_dict`.
        * **molecules**: Expects variables with the names in the
          *molecule_ids* configuration. These are the monomers consumed
          by transcription.
        * **factors**: Expects variables for each transcription factor's
          concentration.
        * **transcripts**: The linked store should store the
          concentrations of the transcripts.
        * **proteins**: The linked store should hold the concentrations
          of the transcription factors and the RNA polymerase.

        Arguments:
            initial_parameters: The following configuration options may
                be provided:

                * **promoter_affinities** (:py:class:`dict`): Maps from
                  binding state tuples to the binding affinity of RNA
                  polymerase and the promoter when the promoter is at
                  that binding state. The binding state of a promoter is
                  which (if any) transcription factors are bound to the
                  promoter. Such a binding state can be represented by a
                  binding state tuple, which is a :py:class:`tuple`
                  whose first element is the name of the promoter. All
                  bound transcription factors are listed as subsequent
                  elements. If no transcription factors are bound, the
                  sole subsequent element is ``None``.

                  .. todo:: What is the significance of the order in the
                      binding state tuple?

                  .. todo:: What are the units of the affinities?

                * **transcription_factors** (:py:class:`list`): A list
                  of all modeled transcription factors.
                * **sequence**: The DNA sequence that includes all the
                  genes whose transcription is being modeled.
                * **templates** (:py:class:`dict`): Maps from the name
                  of an operon to that operon's :term:`template
                  specification`.
                * **genes** (:py:class:`dict`): Maps from operon name to
                  a list of the names of the genes in that operon.
                * **elongation_rate** (:py:class:`float`): The
                  elongation rate of the RNA polymerase.
                * **polymerase_occlusion** (:py:class:`int`): The number
                  of base pairs behind the polymerase where another
                  polymerase is occluded and so cannot bind.
                * **symbol_to_monomer** (:py:class:`dict`): Maps from
                  the symbols used to represent monomers in the RNA
                  sequence to the name of the free monomer. This should
                  generally be
                  :py:data:`vivarium.data.nucleotides.nucleotides`.
                * **monomer_ids** (:py:class:`list`): A list of the
                  names of the free monomers consumed by transcription.
                  This can generally be computed as:

                  >>> from vivarium.data.nucleotides import nucleotides
                  >>> monomer_ids = nucleotides.values()
                  >>> print(list(monomer_ids))
                  ['ATP', 'GTP', 'UTP', 'CTP']

                  Note that we only included the ``list()``
                  transformation to make the output prettier. The
                  ``dict_values`` object returned by the ``.values()``
                  call is sufficiently list-like for use here.
                * **molecule_ids** (:py:class:`list`): A list of all the
                  molecules needed by the :term:`process`. This will
                  generally be the same as *monomer_ids*.

        Example configuring the process (uses
        :py:func:`vivarium.library.pretty.format_dict`):

        >>> import random
        >>>
        >>> import numpy as np
        >>>
        >>> from vivarium.states.chromosome import (
        ...     toy_chromosome_config,
        ...     Chromosome,
        ... )
        >>> from vivarium.data.nucleotides import nucleotides
        >>> # format_dict lets us print dictionaries prettily
        >>> from vivarium.library.pretty import format_dict
        >>>
        >>> random.seed(0)  # Needed because process is stochastic
        >>> np.random.seed(0)
        >>> # We will use the toy chromosome from toy_chromosome_config
        >>> print(toy_chromosome_config)
        {'sequence': 'ATACGGCACGTGACCGTCAACTTA', 'genes': {'oA': ['eA'], 'oAZ': ['eA', 'eZ'], 'oB': ['eB'], 'oBY': ['eB', 'eY']}, 'promoter_order': ['pA', 'pB'], 'promoters': {'pA': {'id': 'pA', 'position': 3, 'direction': 1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfA': <Quantity(0.3, 'millimolar')>}}], 'terminators': [{'position': 6, 'strength': 0.5, 'products': ['oA']}, {'position': 12, 'strength': 1.0, 'products': ['oAZ']}]}, 'pB': {'id': 'pB', 'position': -3, 'direction': -1, 'sites': [{'position': 0, 'length': 3, 'thresholds': {'tfB': <Quantity(0.5, 'millimolar')>}}], 'terminators': [{'position': -9, 'strength': 0.5, 'products': ['oB']}, {'position': -12, 'strength': 1.0, 'products': ['oBY']}]}}, 'promoter_affinities': {('pA', None): 1.0, ('pA', 'tfA'): 10.0, ('pB', None): 1.0, ('pB', 'tfB'): 10.0}, 'domains': {0: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'rnaps': {}}
        >>> monomer_ids = list(nucleotides.values())
        >>> configuration = {
        ...     'promoter_affinities': {
        ...         ('pA', None): 1.0,
        ...         ('pA', 'tfA'): 10.0,
        ...         ('pB', None): 1.0,
        ...         ('pB', 'tfB'): 10.0},
        ...     'transcription_factors': ['tfA', 'tfB'],
        ...     'sequence': toy_chromosome_config['sequence'],
        ...     'templates': toy_chromosome_config['promoters'],
        ...     'genes': toy_chromosome_config['genes'],
        ...     'elongation_rate': 10.0,
        ...     'polymerase_occlusion': 5,
        ...     'symbol_to_monomer': nucleotides,
        ...     'monomer_ids': monomer_ids,
        ...     'molecule_ids': monomer_ids,
        ... }
        >>> # At this point we haven't used the toy chromosome yet
        >>> # because it will be specified in the chromosome port.
        >>> # Notice that the parameters are specific to the chromosome.
        >>> transcription_process = Transcription(configuration)
        >>> # Now we need to initialize the simulation stores
        >>> state = {
        ...     'chromosome': toy_chromosome_config,
        ...     'molecules': {
        ...         nucleotide: 10
        ...         for nucleotide in monomer_ids
        ...     },
        ...     'proteins': {UNBOUND_RNAP_KEY: 10},
        ...     'factors': {'tfA': 0.2 * units.mM, 'tfB': 0.7 * units.mM},
        ... }
        >>> update = transcription_process.next_update(1.0, state)
        >>> print(update['chromosome'])
        {'rnaps': {'_add': [{'path': (2,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 2, 'template': 'pA', 'template_index': 0, 'terminator': 1, 'domain': 0, 'state': 'polymerizing', 'position': 7}}, {'path': (3,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 3, 'template': 'pB', 'template_index': 1, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 3}}, {'path': (4,), 'state': <class 'vivarium.states.chromosome.Rnap'>: {'id': 4, 'template': 'pA', 'template_index': 0, 'terminator': 0, 'domain': 0, 'state': 'occluding', 'position': 0}}], '_delete': []}, 'rnap_id': 4, 'domains': {0: <class 'vivarium.states.chromosome.Domain'>: {'id': 0, 'lead': 0, 'lag': 0, 'children': []}}, 'root_domain': 0}
        '''

        if not initial_parameters:
            initial_parameters = {}

        log.debug(
            'inital transcription parameters: {}'.format(initial_parameters))

        super(Transcription, self).__init__(initial_parameters)

        self.derive_defaults('templates', 'promoter_order', keys_list)
        self.derive_defaults('templates', 'transcript_ids', template_products)

        self.sequence = self.parameters['sequence']
        self.templates = self.parameters['templates']
        self.genes = self.parameters['genes']
        empty_chromosome = Chromosome({
            'sequence': self.sequence,
            'promoters': self.templates,
            'genes': self.genes
        })
        self.sequences = empty_chromosome.sequences()
        self.symbol_to_monomer = self.parameters['symbol_to_monomer']

        log.debug('chromosome sequence: {}'.format(self.sequence))

        self.promoter_affinities = self.parameters['promoter_affinities']
        self.promoter_order = self.parameters['promoter_order']
        self.promoter_count = len(self.promoter_order)

        self.transcription_factors = self.parameters['transcription_factors']
        self.molecule_ids = self.parameters['molecule_ids']
        self.molecule_ids.extend(['ATP', 'ADP'])
        self.monomer_ids = self.parameters['monomer_ids']
        self.transcript_ids = self.parameters['transcript_ids']
        self.elongation = 0
        self.elongation_rate = self.parameters['elongation_rate']
        self.polymerase_occlusion = self.parameters['polymerase_occlusion']

        self.stoichiometry = build_stoichiometry(self.promoter_count)
        self.initiation = StochasticSystem(self.stoichiometry,
                                           random_seed=np.random.randint(
                                               2**31))

        self.protein_ids = [UNBOUND_RNAP_KEY] + self.transcription_factors

        self.initial_domains = self.parameters['initial_domains']
        self.concentrations_deriver_key = self.parameters[
            'concentrations_deriver_key']

        self.chromosome_ports = ['rnaps', 'rnap_id', 'domains', 'root_domain']

        log.debug('final transcription parameters: {}'.format(self.parameters))

    def build_affinity_vector(self, promoters, factors):
        vector = np.zeros(len(self.promoter_order), dtype=np.float64)
        for index, promoter_key in enumerate(self.promoter_order):
            promoter = promoters[promoter_key]
            binding = promoter.binding_state(factors)
            affinity = self.promoter_affinities.get(binding, 0.0)
            # print('promoter state - {}: {}'.format(binding, affinity))
            vector[index] = affinity
        return vector

    def chromosome_config(self, chromosome_states):
        return dict(chromosome_states,
                    sequence=self.sequence,
                    promoters=self.templates,
                    promoter_order=self.promoter_order,
                    genes=self.genes)

    def ports_schema(self):
        schema = {}

        schema['chromosome'] = {
            'rnap_id': {
                '_default': 1,
                '_updater': 'set'
            },
            'root_domain': {
                '_default': 0,
                '_updater': 'set'
            },
            'domains': {
                '*': {
                    'id': {
                        '_default': 1,
                        '_updater': 'set'
                    },
                    'lead': {
                        '_default': 0,
                        '_updater': 'set'
                    },
                    'lag': {
                        '_default': 0,
                        '_updater': 'set'
                    },
                    'children': {
                        '_default': [],
                        '_updater': 'set'
                    }
                }
            },
            'rnaps': {
                '*': {
                    'id': {
                        '_default': -1,
                        '_updater': 'set'
                    },
                    'domain': {
                        '_default': 0,
                        '_updater': 'set'
                    },
                    'state': {
                        '_default': None,
                        '_updater': 'set',
                        '_emit': True
                    },
                    'position': {
                        '_default': 0,
                        '_updater': 'set',
                        '_emit': True
                    },
                    'template': {
                        '_default': None,
                        '_updater': 'set',
                        '_emit': True
                    },
                    'template_index': {
                        '_default': 0,
                        '_updater': 'set',
                        '_emit': True
                    },
                    'terminator': {
                        '_default': 0,
                        '_updater': 'set',
                        '_emit': True
                    }
                }
            }
        }

        initial_domains = {
            id: {
                'id': {
                    '_default': id,
                    '_updater': 'set'
                },
                'lead': {
                    '_default': 0,
                    '_updater': 'set'
                },
                'lag': {
                    '_default': 0,
                    '_updater': 'set'
                },
                'children': {
                    '_default': [],
                    '_updater': 'set'
                }
            }
            for id, domain in self.initial_domains.items()
        }
        schema['chromosome']['domains'].update(initial_domains)

        schema['molecules'] = {
            molecule: {
                '_default': 0,
                '_divider': 'split',
                '_emit': True
            }
            for molecule in self.molecule_ids
        }

        schema['factors'] = {
            factor: {
                '_default': 0.0,
                '_divider': 'split'
            }
            for factor in self.transcription_factors
        }

        schema['transcripts'] = {
            protein: {
                '_default': 0,
                '_divider': 'split',
                '_emit': True
            }
            for protein in self.transcript_ids
        }

        schema['proteins'] = {
            protein: {
                '_default': 0,
                '_divider': 'split',
                '_emit': True
            }
            for protein in self.protein_ids
        }

        schema['global'] = {}

        return schema

    def derivers(self):
        return {
            self.concentrations_deriver_key: {
                'deriver': 'concentrations_deriver',
                'port_mapping': {
                    'global': 'global',
                    'counts': 'proteins',
                    'concentrations': 'factors'
                },
                'config': {
                    'concentration_keys': self.transcription_factors
                }
            }
        }

    def next_update(self, timestep, states):
        chromosome_state = states['chromosome']
        # chromosome_state['rnaps'] = list(chromosome_state['rnaps'].values())
        original_rnap_keys = [
            rnap['id'] for rnap in chromosome_state['rnaps'].values()
        ]
        chromosome = Chromosome(self.chromosome_config(chromosome_state))

        molecules = states['molecules']
        proteins = states['proteins']
        factors = states['factors']  # as concentrations

        promoter_rnaps = chromosome.promoter_rnaps()
        promoter_domains = chromosome.promoter_domains()

        # Find out how many promoters are currently blocked by a
        # newly initiated or occluding rnap
        promoter_count = len(chromosome.promoter_order)
        blocked_promoters = np.zeros(promoter_count, dtype=np.int64)
        open_domains = {}
        bound_domains = {}
        for promoter_index, promoter_key in enumerate(
                chromosome.promoter_order):
            domains = []
            for rnap in promoter_rnaps.get(promoter_key, {}).values():
                if rnap.is_occluding():
                    domains.append(rnap.domain)
                    blocked_promoters[promoter_index] += 1

            bound_domains[promoter_key] = set(domains)
            open_domains[promoter_key] = promoter_domains[
                promoter_key] - bound_domains[promoter_key]

        blocked_promoters = np.array(blocked_promoters)

        # Make the state for a gillespie simulation out of total number of each
        # promoter by copy number not blocked by initiated rnap,
        # concatenated with the number of each promoter that is bound by rnap.
        # These are the two states for each promoter the simulation
        # will operate on, essentially going back and forth between
        # bound and unbound states.
        copy_numbers = chromosome.promoter_copy_numbers()
        original_unbound_rnaps = proteins[UNBOUND_RNAP_KEY]
        monomer_limits = {
            monomer: molecules[monomer]
            for monomer in self.monomer_ids
        }
        unbound_rnaps = original_unbound_rnaps

        time = 0
        now = 0
        elongation = Elongation(self.sequences, chromosome.promoters,
                                monomer_limits, self.symbol_to_monomer,
                                self.elongation)

        initiation_affinity = self.build_affinity_vector(
            chromosome.promoters, factors)

        while time < timestep:
            # build the state vector for the gillespie simulation
            substrate = np.concatenate([
                copy_numbers - blocked_promoters, blocked_promoters,
                [unbound_rnaps]
            ])

            log.debug('transcription substrate: {}'.format(substrate))
            log.debug('blocked promoters: {}'.format(blocked_promoters))

            # find number of monomers until next terminator
            distance = 1 / self.elongation_rate  # chromosome.terminator_distance()

            # find interval of time that elongates to the point of the next terminator
            interval = min(distance, timestep - time)

            if interval == distance:
                # perform the elongation until the next event
                terminations, monomer_limits, chromosome.rnaps = elongation.step(
                    interval, monomer_limits, chromosome.rnaps)
                unbound_rnaps += terminations
            else:
                elongation.store_partial(interval)
                terminations = 0

            log.debug('time: {} --- interval: {}'.format(time, interval))
            log.debug('monomer limits: {}'.format(monomer_limits))
            log.debug('terminations: {}'.format(terminations))

            # run simulation for interval of time to next terminator
            result = self.initiation.evolve(interval, substrate,
                                            initiation_affinity)

            log.debug('result: {}'.format(result))

            # perform binding
            for now, event in zip(result['time'], result['events']):
                # RNAP has bound the promoter
                promoter_key = chromosome.promoter_order[event]
                promoter = chromosome.promoters[promoter_key]
                domains = open_domains[promoter_key]
                domain = choose_element(domains)

                blocked_promoters[event] += 1
                bound_domains[promoter_key].add(domain)
                open_domains[promoter_key].remove(domain)

                # create a new bound RNAP and add it to the chromosome.
                new_rnap = chromosome.bind_rnap(event, domain)
                new_rnap.start_polymerizing()

                log.debug('newly bound RNAP: {}'.format(new_rnap))

                unbound_rnaps -= 1

            # deal with occluding rnap
            for rnap in chromosome.rnaps.values():
                if rnap.is_unoccluding(self.polymerase_occlusion):
                    log.debug('RNAP unoccluding: {}'.format(rnap))

                    blocked_promoters[rnap.template_index] -= 1
                    bound_domains[rnap.template].remove(rnap.domain)
                    open_domains[rnap.template].add(rnap.domain)
                    rnap.unocclude()
                log.debug('rnap: {}'.format(rnap))

            log.debug('complete: {}'.format(elongation.complete_polymers))

            time += interval

        # track how far elongation proceeded to start from next iteration
        self.elongation = elongation.elongation - int(elongation.elongation)

        proteins = {UNBOUND_RNAP_KEY: unbound_rnaps - original_unbound_rnaps}

        molecules = {
            key: count * -1
            for key, count in elongation.monomers.items()
        }

        # 1 ATP hydrolysis cost per nucleotide elongation
        molecules['ATP'] = 0
        molecules['ADP'] = 0
        for count in elongation.monomers.values():
            molecules['ATP'] -= count
            molecules['ADP'] += count

        chromosome_dict = chromosome.to_dict()
        rnaps = chromosome_dict['rnaps']

        original = set(original_rnap_keys)
        current = set(rnaps.keys())
        bound_rnaps = current - original
        completed_rnaps = original - current
        continuing_rnaps = original - completed_rnaps

        rnap_updates = {
            rnap_id: rnaps[rnap_id]
            for rnap_id in continuing_rnaps
        }

        add_rnaps = [{
            'path': (bound, ),
            'state': rnaps[bound]
        } for bound in bound_rnaps]

        delete_rnaps = [(completed, ) for completed in completed_rnaps]

        rnap_updates['_add'] = add_rnaps
        rnap_updates['_delete'] = delete_rnaps
        chromosome_dict['rnaps'] = rnap_updates

        update = {
            'chromosome':
            {key: chromosome_dict[key]
             for key in self.chromosome_ports},
            'proteins': proteins,
            'molecules': molecules,
            'transcripts': elongation.complete_polymers
        }

        log.debug('molecules update: {}'.format(update['molecules']))

        return update