Beispiel #1
0
def test_site_map_hgnc():
    """Make sure site mapping is done even if only HGNC ID is given."""
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    mapk1_invalid.db_refs = {'HGNC': '6871'}
    st1 = ActiveForm(mapk1_invalid, 'kinase', True)
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
Beispiel #2
0
def test_site_map_hgnc():
    """Make sure site mapping is done even if only HGNC ID is given."""
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    mapk1_invalid.db_refs = {'HGNC': '6871'}
    st1 = ActiveForm(mapk1_invalid, 'kinase', True)
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
def test_invalid_position():
    stmt = Phosphorylation._from_json({
        'enz': {
            'name': 'CFD'
        },
        'sub': {
            'name': 'HP'
        },
        'residue': 'F',
        'position': '2.59'
    })
    valid, mapped = sm.map_sites(stmts=[stmt])
    assert not valid
    assert not mapped
Beispiel #4
0
def test_site_map_within_bound_condition():
    # Here, we test to make sure that agents within a bound condition are
    # site-mapped
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()

    # Add an agent to the bound condition for the object of the statement
    mapk3_invalid.bound_conditions = [BoundCondition(mapk1_invalid)]
    st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase')

    # Map sites
    res = sm.map_sites([st1])

    # Extract the mapped statement
    mapped_statements = res[1]
    assert (len(mapped_statements) == 1)
    mapped_s = mapped_statements[0].mapped_stmt

    # Verify that the agent in the object's bound condition got site-mapped
    validate_mapk1(mapped_s.obj.bound_conditions[0].agent)
Beispiel #5
0
def test_site_map_within_bound_condition():
    # Here, we test to make sure that agents within a bound condition are
    # site-mapped
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()

    # Add an agent to the bound condition for the object of the statement
    mapk3_invalid.bound_conditions = [BoundCondition(mapk1_invalid)]
    st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase')

    # Map sites
    res = sm.map_sites([st1])

    # Extract the mapped statement
    mapped_statements = res[1]
    assert len(mapped_statements) == 1
    mapped_s = mapped_statements[0].mapped_stmt

    # Verify that the agent in the object's bound condition got site-mapped
    validate_mapk1(mapped_s.obj.bound_conditions[0].agent)
Beispiel #6
0
def test_site_map_selfmodification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[ModCondition('phosphorylation', 'T', '183')],
                          db_refs={'UP': 'P28482'})
    st1 = Autophosphorylation(mapk1_invalid, 'Y', '185')
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
    mapped_stmt = mapped[0]
    mm = mapped_stmt.mapped_mods
    assert (mm[0].gene_name, mm[0].orig_res, mm[0].orig_pos, mm[0].mapped_res,
            mm[0].mapped_pos) == ('MAPK1', 'T', '183', 'T', '185')
    assert (mm[1].gene_name, mm[1].orig_res, mm[1].orig_pos, mm[1].mapped_res,
            mm[1].mapped_pos) == ('MAPK1', 'Y', '185', 'Y', '187')
    ms = mapped_stmt.mapped_stmt
    agent1 = ms.enz
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert ms.residue == 'Y'
    assert ms.position == '187'
    assert unicode_strs((mapk1_invalid, st1, valid, mapped))
def test_site_map_selfmodification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[ModCondition('phosphorylation', 'T', '183')],
                          db_refs={'UP': 'P28482'})
    st1 = Autophosphorylation(mapk1_invalid, 'Y', '185')
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
    mapped_stmt = mapped[0]
    mm = mapped_stmt.mapped_mods
    assert (mm[0].gene_name, mm[0].orig_res, mm[0].orig_pos, mm[0].mapped_res,
            mm[0].mapped_pos) == ('MAPK1', 'T', '183', 'T', '185')
    assert (mm[1].gene_name, mm[1].orig_res, mm[1].orig_pos, mm[1].mapped_res,
            mm[1].mapped_pos) == ('MAPK1', 'Y', '185', 'Y', '187')
    ms = mapped_stmt.mapped_stmt
    agent1 = ms.enz
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert ms.residue == 'Y'
    assert ms.position == '187'
    assert unicode_strs((mapk1_invalid, st1, valid, mapped))
Beispiel #8
0
def test_site_map_activity_modification():
    mc = [ModCondition('phosphorylation', 'T', '183'),
          ModCondition('phosphorylation', 'Y', '185')]
    mapk1 = Agent('MAPK1', mods=mc, db_refs={'UP': 'P28482'})

    st1 = ActiveForm(mapk1, 'kinase', True)
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
    ms = mapped[0]
    mm = ms.mapped_mods
    assert (mm[0].gene_name, mm[0].orig_res, mm[0].orig_pos, mm[0].mapped_res,
            mm[0].mapped_pos) == ('MAPK1', 'T', '183', 'T', '185')
    assert (mm[1].gene_name, mm[1].orig_res, mm[1].orig_pos, mm[1].mapped_res,
            mm[1].mapped_pos) == ('MAPK1', 'Y', '185', 'Y', '187')
    assert ms.original_stmt == st1
    assert ms.mapped_stmt.agent.mods[0].matches(ModCondition('phosphorylation',
                                                             'T', '185'))
    assert ms.mapped_stmt.agent.mods[1].matches(ModCondition('phosphorylation',
                                                             'Y', '187'))
    assert unicode_strs((mc, mapk1, st1, valid, mapped))
Beispiel #9
0
def test_site_map_selfmodification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[ModCondition('phosphorylation', 'T', '183')],
                          db_refs={'UP': 'P28482'})
    st1 = SelfModification(mapk1_invalid, 'Y', '185')
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
    mapped_stmt = mapped[0]
    assert mapped_stmt.mapped_mods[0][0] == ('MAPK1', 'T', '183')
    assert mapped_stmt.mapped_mods[0][1][0] == 'T'
    assert mapped_stmt.mapped_mods[0][1][1] == '185'
    assert mapped_stmt.mapped_mods[1][0] == ('MAPK1', 'Y', '185')
    assert mapped_stmt.mapped_mods[1][1][0] == 'Y'
    assert mapped_stmt.mapped_mods[1][1][1] == '187'
    assert mapped_stmt.original_stmt == st1
    ms = mapped_stmt.mapped_stmt
    agent1 = ms.enz
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert ms.residue == 'Y'
    assert ms.position == '187'
    assert unicode_strs((mapk1_invalid, st1, valid, mapped))
Beispiel #10
0
def test_site_map_selfmodification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[ModCondition('phosphorylation', 'T', '183')],
                          db_refs={'UP': 'P28482'})
    st1 = SelfModification(mapk1_invalid, 'Y', '185')
    (valid, mapped) = sm.map_sites([st1])
    assert len(valid) == 0
    assert len(mapped) == 1
    mapped_stmt = mapped[0]
    assert mapped_stmt.mapped_mods[0][0] == ('MAPK1', 'T', '183')
    assert mapped_stmt.mapped_mods[0][1][0] == 'T'
    assert mapped_stmt.mapped_mods[0][1][1] == '185'
    assert mapped_stmt.mapped_mods[1][0] == ('MAPK1', 'Y', '185')
    assert mapped_stmt.mapped_mods[1][1][0] == 'Y'
    assert mapped_stmt.mapped_mods[1][1][1] == '187'
    assert mapped_stmt.original_stmt == st1
    ms = mapped_stmt.mapped_stmt
    agent1 = ms.enz
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert ms.residue == 'Y'
    assert ms.position == '187'
    assert unicode_strs((mapk1_invalid, st1, valid, mapped))
Beispiel #11
0
def test_site_map_modification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[
                              ModCondition('phosphorylation', 'T', '183'),
                              ModCondition('phosphorylation', 'Y', '185')
                          ],
                          db_refs={'UP': 'P28482'})
    mapk3_invalid = Agent('MAPK3',
                          mods=[ModCondition('phosphorylation', 'T', '201')],
                          db_refs={'UP': 'P27361'})
    map2k1_invalid = Agent('MAP2K1',
                           mods=[
                               ModCondition('phosphorylation', 'S', '217'),
                               ModCondition('phosphorylation', 'S', '221')
                           ],
                           db_refs={'UP': 'Q02750'})

    st1 = Phosphorylation(mapk1_invalid, mapk3_invalid, 'Y', '203')
    st2 = Phosphorylation(map2k1_invalid, mapk1_invalid, 'Y', '218')
    res = sm.map_sites([st1, st2])

    assert len(res) == 2
    valid_stmts = res[0]
    mapped_stmts = res[1]
    assert isinstance(valid_stmts, list)
    assert isinstance(mapped_stmts, list)
    assert len(valid_stmts) == 0
    assert len(mapped_stmts) == 2
    # MAPK1 -> MAPK3
    mapped_stmt1 = mapped_stmts[0]
    assert isinstance(mapped_stmt1, MappedStatement)
    assert mapped_stmt1.original_stmt == st1
    assert isinstance(mapped_stmt1.mapped_mods, list)
    assert len(mapped_stmt1.mapped_mods) == 4, \
        "Got %d mapped mods." % mapped_stmt1.mapped_mods  # FIXME
    ms = mapped_stmt1.mapped_stmt
    assert isinstance(ms, Statement)
    agent1 = ms.enz
    agent2 = ms.sub
    assert agent1.name == 'MAPK1'
    assert len(agent1.mods) == 2
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert agent1.mods[1].matches(ModCondition('phosphorylation', 'Y', '187'))
    assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '202'))
    assert ms.residue == 'Y'
    assert ms.position == '204'

    # MAP2K1 -> MAPK1
    mapped_stmt2 = mapped_stmts[1]
    assert isinstance(mapped_stmt2, MappedStatement)
    assert mapped_stmt2.original_stmt == st2
    assert isinstance(mapped_stmt2.mapped_mods, list)
    assert len(mapped_stmt2.mapped_mods) == 5, \
        "Got %d mapped mods." % mapped_stmt1.mapped_mods  # FIXME
    ms = mapped_stmt2.mapped_stmt
    assert isinstance(ms, Statement)
    agent1 = ms.enz
    agent2 = ms.sub
    assert agent1.name == 'MAP2K1'
    assert len(agent1.mods) == 2
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'S', '218'))
    assert agent1.mods[1].matches(ModCondition('phosphorylation', 'S', '222'))
    assert len(agent2.mods) == 2
    assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert agent2.mods[1].matches(ModCondition('phosphorylation', 'Y', '187'))
    # The incorrect phosphorylation residue is passed through to the new
    # statement unchanged
    assert ms.residue == 'Y'
    assert ms.position == '218'
    # Check for unicode
    assert unicode_strs((mapk1_invalid, mapk3_invalid, map2k1_invalid, st1,
                         st2, res, valid_stmts, mapped_stmts))
Beispiel #12
0
def test_site_map_complex():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = RasGef(mapk1_invalid, mapk3_invalid)
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #13
0
def test_site_map_activation():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase')
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #14
0
def test_site_map_gap():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = Gap(mapk1_invalid, mapk3_invalid)
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #15
0
def test_site_map_complex():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = Complex([mapk1_invalid, mapk3_invalid])
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #16
0
    def run_preassembly(self, stmts, print_summary=True):
        """Run complete preassembly procedure on the given statements.

        Results are returned as a dict and stored in the attribute
        :py:attr:`results`. They are also saved in the pickle file
        `<basename>_results.pkl`.

        Parameters
        ----------
        stmts : list of :py:class:`indra.statements.Statement`
            Statements to preassemble.
        print_summary : bool
            If True (default), prints a summary of the preassembly process to
            the console.

        Returns
        -------
        dict
            A dict containing the following entries:

            - `raw`: the starting set of statements before preassembly.
            - `duplicates1`: statements after initial de-duplication.
            - `valid`: statements found to have valid modification sites.
            - `mapped`: mapped statements (list of
              :py:class:`indra.preassembler.sitemapper.MappedStatement`).
            - `mapped_stmts`: combined list of valid statements and statements
              after mapping.
            - `duplicates2`: statements resulting from de-duplication of the
              statements in `mapped_stmts`.
            - `related2`: top-level statements after combining the statements
              in `duplicates2`.
        """
        # First round of preassembly: remove duplicates before sitemapping
        pa1 = Preassembler(hierarchies, stmts)
        logger.info("Combining duplicates")
        pa1.combine_duplicates()
        # Map sites
        logger.info("Mapping sites")
        (valid, mapped) = sm.map_sites(pa1.unique_stmts)
        # Combine valid and successfully mapped statements into single list
        correctly_mapped_stmts = []
        for ms in mapped:
            if all([True if mm[1] is not None else False
                         for mm in ms.mapped_mods]):
                correctly_mapped_stmts.append(ms.mapped_stmt)
        mapped_stmts = valid + correctly_mapped_stmts 
        # Second round of preassembly: de-duplicate and combine related
        pa2 = Preassembler(hierarchies, mapped_stmts)
        logger.info("Combining duplicates again")
        pa2.combine_duplicates()
        pa2.combine_related()
        # Fill out the results dict
        self.results = {}
        self.results['raw'] = stmts
        self.results['duplicates1'] = pa1.unique_stmts
        self.results['valid'] = valid
        self.results['mapped'] = mapped
        self.results['mapped_stmts'] = mapped_stmts
        self.results['duplicates2'] = pa2.unique_stmts
        self.results['related2'] = pa2.related_stmts
        # Print summary
        if print_summary:
            logger.info("\nStarting number of statements: %d" % len(stmts))
            logger.info("After duplicate removal: %d" % len(pa1.unique_stmts))
            logger.info("Unique statements with valid sites: %d" % len(valid))
            logger.info("Unique statements with invalid sites: %d" %
                        len(mapped))
            logger.info("After post-mapping duplicate removal: %d" %
                        len(pa2.unique_stmts))
            logger.info("After combining related statements: %d" %
                        len(pa2.related_stmts))
        # Save the results if we're caching
        if self.basename is not None:
            results_filename = '%s_results.pkl' % self.basename
            with open(results_filename, 'wb') as f:
                pickle.dump(self.results, f, protocol=2)
        return self.results
Beispiel #17
0
    def run_preassembly(self, stmts, print_summary=True):
        """Run complete preassembly procedure on the given statements.

        Results are returned as a dict and stored in the attribute
        :py:attr:`results`. They are also saved in the pickle file
        `<basename>_results.pkl`.

        Parameters
        ----------
        stmts : list of :py:class:`indra.statements.Statement`
            Statements to preassemble.
        print_summary : bool
            If True (default), prints a summary of the preassembly process to
            the console.

        Returns
        -------
        dict
            A dict containing the following entries:

            - `raw`: the starting set of statements before preassembly.
            - `duplicates1`: statements after initial de-duplication.
            - `valid`: statements found to have valid modification sites.
            - `mapped`: mapped statements (list of
              :py:class:`indra.preassembler.sitemapper.MappedStatement`).
            - `mapped_stmts`: combined list of valid statements and statements
              after mapping.
            - `duplicates2`: statements resulting from de-duplication of the
              statements in `mapped_stmts`.
            - `related2`: top-level statements after combining the statements
              in `duplicates2`.
        """
        # First round of preassembly: remove duplicates before sitemapping
        pa1 = Preassembler(hierarchies, stmts)
        logger.info("Combining duplicates")
        pa1.combine_duplicates()
        # Map sites
        logger.info("Mapping sites")
        (valid, mapped) = sm.map_sites(pa1.unique_stmts)
        # Combine valid and successfully mapped statements into single list
        correctly_mapped_stmts = []
        for ms in mapped:
            if all([
                    True if mm[1] is not None else False
                    for mm in ms.mapped_mods
            ]):
                correctly_mapped_stmts.append(ms.mapped_stmt)
        mapped_stmts = valid + correctly_mapped_stmts
        # Second round of preassembly: de-duplicate and combine related
        pa2 = Preassembler(hierarchies, mapped_stmts)
        logger.info("Combining duplicates again")
        pa2.combine_duplicates()
        pa2.combine_related()
        # Fill out the results dict
        self.results = {}
        self.results['raw'] = stmts
        self.results['duplicates1'] = pa1.unique_stmts
        self.results['valid'] = valid
        self.results['mapped'] = mapped
        self.results['mapped_stmts'] = mapped_stmts
        self.results['duplicates2'] = pa2.unique_stmts
        self.results['related2'] = pa2.related_stmts
        # Print summary
        if print_summary:
            logger.info("\nStarting number of statements: %d" % len(stmts))
            logger.info("After duplicate removal: %d" % len(pa1.unique_stmts))
            logger.info("Unique statements with valid sites: %d" % len(valid))
            logger.info("Unique statements with invalid sites: %d" %
                        len(mapped))
            logger.info("After post-mapping duplicate removal: %d" %
                        len(pa2.unique_stmts))
            logger.info("After combining related statements: %d" %
                        len(pa2.related_stmts))
        # Save the results if we're caching
        if self.basename is not None:
            results_filename = '%s_results.pkl' % self.basename
            with open(results_filename, 'wb') as f:
                pickle.dump(self.results, f, protocol=2)
        return self.results
Beispiel #18
0
def test_site_map_rasgef():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = RasGef(mapk1_invalid, mapk3_invalid)
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #19
0
def test_site_map_rasgap():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = RasGap(mapk1_invalid, mapk3_invalid)
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)
Beispiel #20
0
def test_site_map_modification():
    mapk1_invalid = Agent('MAPK1',
                          mods=[ModCondition('phosphorylation', 'T', '183'),
                                ModCondition('phosphorylation', 'Y', '185')],
                          db_refs={'UP': 'P28482'})
    mapk3_invalid = Agent('MAPK3',
                          mods=[ModCondition('phosphorylation', 'T', '201')],
                          db_refs={'UP': 'P27361'})
    map2k1_invalid = Agent('MAP2K1',
                           mods=[ModCondition('phosphorylation', 'S', '217'),
                                 ModCondition('phosphorylation', 'S', '221')],
                           db_refs={'UP': 'Q02750'})

    st1 = Phosphorylation(mapk1_invalid, mapk3_invalid, 'Y', '203')
    st2 = Phosphorylation(map2k1_invalid, mapk1_invalid, 'Y', '218')
    res = sm.map_sites([st1, st2])

    assert len(res) == 2
    valid_stmts = res[0]
    mapped_stmts = res[1]
    assert isinstance(valid_stmts, list)
    assert isinstance(mapped_stmts, list)
    assert len(valid_stmts) == 0
    assert len(mapped_stmts) == 2
    # MAPK1 -> MAPK3
    mapped_stmt1 = mapped_stmts[0]
    assert isinstance(mapped_stmt1, MappedStatement)
    assert mapped_stmt1.original_stmt == st1
    assert isinstance(mapped_stmt1.mapped_mods, list)
    assert len(mapped_stmt1.mapped_mods) == 4 # FIXME
    ms = mapped_stmt1.mapped_stmt
    assert isinstance(ms, Statement)
    agent1 = ms.enz
    agent2 = ms.sub
    assert agent1.name == 'MAPK1'
    assert len(agent1.mods) == 2
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert agent1.mods[1].matches(ModCondition('phosphorylation', 'Y', '187'))
    assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '202'))
    assert ms.residue == 'Y'
    assert ms.position == '204'

    # MAP2K1 -> MAPK1
    mapped_stmt2 = mapped_stmts[1]
    assert isinstance(mapped_stmt2, MappedStatement)
    assert mapped_stmt2.original_stmt == st2
    assert isinstance(mapped_stmt2.mapped_mods, list)
    assert len(mapped_stmt2.mapped_mods) == 5 # FIXME
    ms = mapped_stmt2.mapped_stmt
    assert isinstance(ms, Statement)
    agent1 = ms.enz
    agent2 = ms.sub
    assert agent1.name == 'MAP2K1'
    assert len(agent1.mods) == 2
    assert agent1.mods[0].matches(ModCondition('phosphorylation', 'S', '218'))
    assert agent1.mods[1].matches(ModCondition('phosphorylation', 'S', '222'))
    assert len(agent2.mods) == 2
    assert agent2.mods[0].matches(ModCondition('phosphorylation', 'T', '185'))
    assert agent2.mods[1].matches(ModCondition('phosphorylation', 'Y', '187'))
    # The incorrect phosphorylation residue is passed through to the new
    # statement unchanged
    assert ms.residue == 'Y'
    assert ms.position == '218'
    # Check for unicode
    assert unicode_strs((mapk1_invalid, mapk3_invalid, map2k1_invalid, st1,
                         st2, res, valid_stmts, mapped_stmts))
Beispiel #21
0
def test_site_map_activation():
    (mapk1_invalid, mapk3_invalid) = get_invalid_mapks()
    st1 = Activation(mapk1_invalid, mapk3_invalid, 'kinase')
    res = sm.map_sites([st1])
    check_validated_mapks(res, st1)