예제 #1
0
def test_regulate_amount4_subj_act():
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.INCREASES,
                         subject_modifier=activity(name='tscript'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], IncreaseAmount)
    subj = pbp.statements[0].subj
    assert subj.name == 'MAP2K1'
    assert isinstance(subj.activity, ActivityCondition)
    assert subj.activity.activity_type == 'transcription'
    assert subj.activity.is_active == True
    assert len(pbp.statements[0].evidence) == 1

    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.INCREASES,
                         subject_modifier=activity(name='act'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], IncreaseAmount)
    subj = pbp.statements[0].subj
    assert subj.name == 'MAP2K1'
    assert isinstance(subj.activity, ActivityCondition)
    assert subj.activity.activity_type == 'activity'
    assert subj.activity.is_active == True
    assert len(pbp.statements[0].evidence) == 1
예제 #2
0
def test_phosphorylation_one_site_with_evidence():
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC',
                  variants=[pmod('Ph', position=185, code='Thr')])
    g = pybel.BELGraph()
    ev_text = 'Some evidence.'
    ev_pmid = '123456'
    edge_hash = g.add_qualified_edge(mek, erk, relation=pc.DIRECTLY_INCREASES,
                                     evidence=ev_text, citation=ev_pmid,
                                     annotations={"TextLocation": 'Abstract'})
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], Phosphorylation)
    assert pbp.statements[0].residue == 'T'
    assert pbp.statements[0].position == '185'
    enz = pbp.statements[0].enz
    sub = pbp.statements[0].sub
    assert enz.name == 'MAP2K1'
    assert enz.mods == []
    assert sub.name == 'MAPK1'
    assert sub.mods == []
    # Check evidence
    assert len(pbp.statements[0].evidence) == 1
    ev = pbp.statements[0].evidence[0]
    assert ev.source_api == 'bel'
    assert ev.source_id == edge_hash
    assert ev.pmid == ev_pmid
    assert ev.text == ev_text
    assert ev.annotations == {'bel': 'p(HGNC:MAP2K1) directlyIncreases '
                                     'p(HGNC:MAPK1, pmod(Ph, Thr, 185))'}
    assert ev.epistemics == {'direct': True, 'section_type': 'abstract'}
예제 #3
0
    def test_simple(self):
        graph = pybel.BELGraph()

        key = 'DGXP'

        a = protein('HGNC', 'A')
        b = protein('HGNC', 'B')
        c = protein('HGNC', 'c')
        d = bioprocess('GOBP', 'D')

        graph.add_node_from_data(a)
        graph.add_node_from_data(b)
        graph.add_node_from_data(c)
        graph.add_node_from_data(d)

        graph.nodes[a][key] = 2
        graph.nodes[b][key] = -1
        graph.nodes[c][key] = 1

        graph.add_increases(a, b, citation=n(), evidence=n())
        graph.add_decreases(b, d, citation=n(), evidence=n())
        graph.add_increases(a, c, citation=n(), evidence=n())
        graph.add_increases(c, d, citation=n(), evidence=n())

        candidate_mechanisms = generate_bioprocess_mechanisms(graph, key)

        self.assertEqual(1, len(candidate_mechanisms))
        self.assertIn(d, candidate_mechanisms)
예제 #4
0
def test_conversion():
    enz = protein(name='PLCG1', namespace='HGNC')
    react_1 = abundance('SCHEM', '1-Phosphatidyl-D-myo-inositol 4,5-bisphosphate')
    p1 = abundance('SCHEM', 'Diacylglycerol')
    p2 = abundance('SCHEM', 'Inositol 1,4,5-trisphosphate')

    rxn = reaction(
        reactants=[react_1],
        products=[p1, p2],
    )
    g = pybel.BELGraph()
    g.add_qualified_edge(enz, rxn, relation=pc.DIRECTLY_INCREASES,
                         subject_modifier=activity(name='activity'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, Conversion)
    assert stmt.subj.name == 'PLCG1'
    assert stmt.subj.activity.activity_type == 'activity'
    assert stmt.subj.activity.is_active is True
    assert len(stmt.obj_from) == 1
    assert isinstance(stmt.obj_from[0], Agent)
    assert stmt.obj_from[0].name == '1-Phosphatidyl-D-myo-inositol ' \
                                    '4,5-bisphosphate'
    assert len(stmt.obj_to) == 2
    assert stmt.obj_to[0].name == 'Diacylglycerol'
    assert stmt.obj_to[1].name == 'Inositol 1,4,5-trisphosphate'
    assert len(stmt.evidence) == 1
예제 #5
0
def get_bel() -> pybel.BELGraph:
    """Get the Rhea data."""
    version = bioversions.get_version('rhea')
    # Parse the RDF file
    g = BIO2BEL_MODULE.ensure_rdf('rhea', version, url=URL)
    # Get a list of all the reactions in the database
    # (the bidirectionalReaction criterion is added to ensure that we only recieve the nondirectional version of a given reaction)
    rxns = g.query(
        """
        SELECT ?reaction ?id ?reactionEquation WHERE {
            ?reaction rh:equation ?reactionEquation .
            ?reaction rh:bidirectionalReaction ?bdr .
            ?reaction rh:id ?id
        }
        """,
    )
    rv = pybel.BELGraph(name='rhea', version=version)
    # Loop over reactions, adding reaction nodes to rv as we go
    # Rather than converting to a set (time-consuming), just let the PyBEL graph handle the occasional duplicate
    for (reaction_uri, reaction_id, reaction_equation) in rxns:
        # Retrieve the reactants and products of the reaction
        participants = _participants(g, reaction_uri)
        # Add a reaction node to the BELGraph
        reaction = dsl.Reaction(
            participants['reactants'],
            participants['products'],
            namespace='RHEA',
            name=reaction_equation,
            identifier=reaction_id,
        )
        rv.add_node_from_data(reaction)
    return rv
예제 #6
0
    def test_simple(self):
        graph = pybel.BELGraph()

        key = 'DGXP'

        a = PROTEIN, 'HGNC', 'A'
        b = PROTEIN, 'HGNC', 'B'
        c = PROTEIN, 'HGNC', 'c'
        d = BIOPROCESS, 'GOBP', 'D'

        graph.add_simple_node(*a)
        graph.add_simple_node(*b)
        graph.add_simple_node(*c)
        graph.add_simple_node(*d)

        graph.node[a][key] = 2
        graph.node[b][key] = -1
        graph.node[c][key] = 1

        graph.add_edge(a, b, attr_dict={RELATION: INCREASES})
        graph.add_edge(b, d, attr_dict={RELATION: DECREASES})
        graph.add_edge(a, c, attr_dict={RELATION: INCREASES})
        graph.add_edge(c, d, attr_dict={RELATION: INCREASES})

        candidate_mechanisms = cmpa.generate_bioprocess_mechanisms(graph, key)

        self.assertEqual(1, len(candidate_mechanisms))
        self.assertIn(d, candidate_mechanisms)
예제 #7
0
파일: bel.py 프로젝트: kkaris/pypath
 def reset_bel_graph(self):
     """
     Assigns a new, empty ``pybel.BELGraph`` instance to the ``bel_graph``
     attribute.
     """
     
     self.bel_graph = pybel.BELGraph()
예제 #8
0
def get_bel() -> pybel.BELGraph:
    """Get the ComPath mappings as BEL."""
    graph = pybel.BELGraph(
        name='ComPath Mappings',
        version='1.1.0',
        description=
        'Hierarchical and equivalence relations between entries in KEGG, Reactome, PathBank,'
        ' and WikiPathways.')
    df = get_df()
    for source_ns, source_id, source_name, relation, target_ns, target_id, target_name in df.values:
        source = BiologicalProcess(
            namespace=source_ns,
            identifier=source_id,
            name=source_name,
        )
        target = BiologicalProcess(
            namespace=target_ns,
            identifier=target_id,
            name=target_name,
        )
        if relation == 'isPartOf':
            graph.add_part_of(source, target)
        elif relation == 'equivalentTo':
            graph.add_equivalence(source, target)
        else:
            raise ValueError(f'invalid mapping with relation: {relation}')

    return graph
예제 #9
0
 def __init__(self,
              stmts=None,
              name=None,
              description=None,
              version=None,
              authors=None,
              contact=None,
              license=None,
              copyright=None,
              disclaimer=None):
     if stmts is None:
         self.statements = []
     else:
         self.statements = stmts
     if name is None:
         name = 'indra'
     if version is None:
         version = str(uuid.uuid4())
     # Create the model and assign metadata
     self.model = pybel.BELGraph(
         name=name,
         description=description,
         version=version,
         authors=authors,
         contact=contact,
         license=license,
         copyright=copyright,
         disclaimer=disclaimer,
     )
     ns_dict = {
         'HGNC':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns',
         'UP':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/swissprot/swissprot-20170725.belns',
         'IP':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/interpro/interpro-20170731.belns',
         'FPLX':
         'https://raw.githubusercontent.com/sorgerlab/famplex/'
         '5f5b573fe26d7405dbccb711ae8e5697b6a3ec7e/export/famplex.belns',
         #'PFAM':
         #'NXPFA':
         'CHEBI':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/chebi-ids/chebi-ids-20170725.belns',
         'GO':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/go/go-20180109.belns',
         'MESH':
         'https://arty.scai.fraunhofer.de/artifactory/bel/'
         'namespace/mesh-processes/mesh-processes-20170725.belns'
     }
     self.model.namespace_url.update(ns_dict)
     self.model.namespace_pattern['PUBCHEM'] = '\d+'
예제 #10
0
def test_regulate_amount1_prot_obj():
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.INCREASES,
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], IncreaseAmount)
    assert len(pbp.statements[0].evidence) == 1
예제 #11
0
def test_controlled_transloc_loc_cond():
    """Controlled translocations are currently not handled."""
    subj = protein(name='MAP2K1', namespace='HGNC')
    obj = protein(name='MAPK1', namespace='HGNC')
    g = pybel.BELGraph()
    transloc = translocation(from_loc=entity('GOCC', 'intracellular'),
                             to_loc=entity('GOCC', 'extracellular space'))
    g.add_qualified_edge(subj, obj, relation=pc.INCREASES,
                         object_modifier=transloc,
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert not pbp.statements
예제 #12
0
def test_regulate_amount3_deg():
    # FIXME: Create a stability-specific statement for p->deg(p(Foo))
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.INCREASES,
                         object_modifier=degradation(),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], DecreaseAmount)
    assert len(pbp.statements[0].evidence) == 1
예제 #13
0
def test_activation_bioprocess():
    bax = protein(name='BAX', namespace='HGNC')
    apoptosis = bioprocess(name='apoptosis', namespace='GOBP')
    g = pybel.BELGraph()
    g.add_qualified_edge(bax, apoptosis, relation=pc.INCREASES,
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, Activation)
    assert stmt.subj.name == 'BAX'
    assert stmt.obj.name == 'apoptosis'
    assert stmt.obj.db_refs == {} # FIXME: Update when GO lookup is implemented
    assert len(pbp.statements[0].evidence) == 1
예제 #14
0
    def __init__(self,
                 stmts=None,
                 name=None,
                 description=None,
                 version=None,
                 **kwargs):
        if stmts is None:
            self.statements = []
        else:
            self.statements = stmts

        # Create the model and assign metadata
        self.model = pybel.BELGraph(name=name,
                                    version=version,
                                    description=description,
                                    **kwargs)
예제 #15
0
def test_gap():
    sos = protein(name='RASA1', namespace='HGNC')
    kras = protein(name='KRAS', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(sos, kras, relation=pc.DIRECTLY_DECREASES,
                         subject_modifier=activity(name='activity'),
                         object_modifier=activity(name='gtp'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, Gap)
    assert stmt.gap.name == 'RASA1'
    assert stmt.ras.name == 'KRAS'
    assert stmt.gap.activity.activity_type == 'activity'
    assert stmt.gap.activity.is_active is True
    assert stmt.ras.activity is None
    assert len(pbp.statements[0].evidence) == 1
예제 #16
0
def test_subject_transloc_loc_cond():
    """Translocations of the subject are treated as location conditions on the
    subject (using the to_loc location as the condition)"""
    subj = protein(name='MAP2K1', namespace='HGNC')
    obj = protein(name='MAPK1', namespace='HGNC')
    transloc = translocation(from_loc=entity('GOCC', 'intracellular'),
                             to_loc=entity('GOCC', 'extracellular space'))
    g = pybel.BELGraph()
    g.add_qualified_edge(subj, obj, relation=pc.INCREASES,
                         subject_modifier=transloc,
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, IncreaseAmount)
    assert stmt.subj.name == 'MAP2K1'
    assert stmt.subj.location == 'extracellular space'
    assert stmt.obj.name == 'MAPK1'
예제 #17
0
def test_indirect_gef_is_activation():
    sos = protein(name='SOS1', namespace='HGNC')
    kras = protein(name='KRAS', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(sos, kras, relation=pc.INCREASES,
                         subject_modifier=activity(name='activity'),
                         object_modifier=activity(name='gtp'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, Activation)
    assert stmt.subj.name == 'SOS1'
    assert stmt.obj.name == 'KRAS'
    assert stmt.subj.activity.activity_type == 'activity'
    assert stmt.subj.activity.is_active is True
    assert stmt.obj.activity is None
    assert stmt.obj_activity == 'gtpbound'
    assert len(pbp.statements[0].evidence) == 1
예제 #18
0
def test_phosphorylation_two_sites():
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC',
                  variants=[pmod('Ph', position=185, code='Thr'),
                            pmod('Ph', position=187, code='Tyr')])
    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.DIRECTLY_INCREASES,
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 2
    stmt1 = pbp.statements[0]
    stmt2 = pbp.statements[1]
    assert stmt1.residue == 'T'
    assert stmt1.position == '185'
    assert stmt2.residue == 'Y'
    assert stmt2.position == '187'
    assert stmt1.sub.mods == []
    assert stmt2.sub.mods == []
    assert len(pbp.statements[0].evidence) == 1
예제 #19
0
def test_gtpactivation():
    kras = protein(name='KRAS', namespace='HGNC')
    braf = protein(name='BRAF', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(kras, braf, relation=pc.DIRECTLY_INCREASES,
                         subject_modifier=activity(name='gtp'),
                         object_modifier=activity(name='kin'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, GtpActivation)
    assert stmt.subj.name == 'KRAS'
    assert stmt.subj.activity.activity_type == 'gtpbound'
    assert stmt.subj.activity.is_active is True
    assert stmt.obj.name == 'BRAF'
    assert stmt.obj.activity is None
    assert stmt.obj_activity == 'kinase'
    assert len(stmt.evidence) == 1
예제 #20
0
def process_bel_stmt(bel: str, squeeze: bool = False):
    """Process a single BEL statement and return the PybelProcessor
    or a single statement if ``squeeze`` is True.

    Parameters
    ----------
    bel : str
        A BEL statement. See example below.
    squeeze : Optional[bool]
        If squeeze and there's only one statement in the processor,
        it will be unpacked.

    Returns
    -------
    statements : Union[Statement, PybelProcessor]
        A list of INDRA statments derived from the BEL statement.
        If squeeze is true and there was only one statement, the
        unpacked INDRA statement will be returned.

    Examples
    --------
    >>> from indra.sources.bel import process_bel_stmt
    >>> bel_s = 'kin(p(FPLX:MEK)) -> kin(p(FPLX:ERK))'
    >>> process_bel_stmt(bel_s, squeeze=True)
    Activation(MEK(kinase), ERK(), kinase)
    """
    r = pybel.parse(bel)
    # make sure activations in the right place
    for a, b in [(pc.SOURCE, pc.SOURCE_MODIFIER),
                 (pc.TARGET, pc.TARGET_MODIFIER)]:
        side = r[a]
        for c in [pc.MODIFIER, pc.EFFECT, pc.FROM_LOC, pc.TO_LOC, pc.LOCATION]:
            if c in side:
                r.setdefault(b, {})[c] = side.pop(c)
    graph = pybel.BELGraph()
    add_sbel_row(graph, r)
    bp = process_pybel_graph(graph)
    if squeeze and len(bp.statements) == 1:
        return bp.statements[0]
    return bp
예제 #21
0
def test_regulate_activity():
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(mek, erk, relation=pc.INCREASES,
                         subject_modifier=activity(name='kin'),
                         object_modifier=activity(name='kin'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    assert isinstance(pbp.statements[0], Activation)
    subj = pbp.statements[0].subj
    assert subj.name == 'MAP2K1'
    assert isinstance(subj.activity, ActivityCondition)
    assert subj.activity.activity_type == 'kinase'
    assert subj.activity.is_active == True
    obj = pbp.statements[0].obj
    assert obj.name == 'MAPK1'
    assert obj.activity is None
    assert pbp.statements[0].obj_activity == 'kinase'
    assert len(pbp.statements[0].evidence) == 1
예제 #22
0
def test_subject_transloc_active_form():
    """ActiveForms where the subject is a translocation--should draw on the
    to-location of the subject."""
    subj = protein(name='MAP2K1', namespace='HGNC')
    obj = protein(name='MAP2K1', namespace='HGNC')
    transloc = translocation(from_loc=entity('GOCC', 'intracellular'),
                             to_loc=entity('GOCC', 'extracellular space'))
    g = pybel.BELGraph()
    g.add_qualified_edge(subj, obj, relation=pc.INCREASES,
                         subject_modifier=transloc,
                         object_modifier=activity(name='kin'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, ActiveForm)
    assert stmt.agent.name == 'MAP2K1'
    assert stmt.agent.location == 'extracellular space'
    assert stmt.agent.activity is None
    assert stmt.activity == 'kinase'
    assert stmt.is_active is True
예제 #23
0
def test_active_form():
    p53_pmod = protein(name='TP53', namespace='HGNC',
                       variants=[pmod('Ph', position=33, code='Ser')])
    p53_obj = protein(name='TP53', namespace='HGNC')
    g = pybel.BELGraph()
    g.add_qualified_edge(p53_pmod, p53_obj, relation=pc.INCREASES,
                         object_modifier=activity(name='tscript'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 1
    stmt = pbp.statements[0]
    assert isinstance(stmt, ActiveForm)
    assert stmt.activity == 'transcription'
    assert stmt.is_active is True
    ag = stmt.agent
    assert ag.name == 'TP53'
    assert len(ag.mods) == 1
    mc = ag.mods[0]
    assert mc.mod_type == 'phosphorylation'
    assert mc.residue == 'S'
    assert mc.position == '33'
    assert len(pbp.statements[0].evidence) == 1
예제 #24
0
def test_complex_stmt_with_activation():
    raf = protein(name='BRAF', namespace='HGNC')
    mek = protein(name='MAP2K1', namespace='HGNC')
    erk = protein(name='MAPK1', namespace='HGNC')
    cplx = complex_abundance([raf, mek])
    g = pybel.BELGraph()
    g.add_qualified_edge(cplx, erk, relation=pc.DIRECTLY_INCREASES,
                         object_modifier=activity(name='kin'),
                         evidence="Some evidence.", citation='123456')
    pbp = bel.process_pybel_graph(g)
    assert pbp.statements
    assert len(pbp.statements) == 2
    stmt1 = pbp.statements[0]
    assert isinstance(stmt1, Complex)
    assert len(stmt1.agent_list()) == 2
    assert sorted([ag.name for ag in stmt1.agent_list()]) == ['BRAF', 'MAP2K1']
    assert stmt1.evidence
    stmt2 = pbp.statements[1]
    assert isinstance(stmt2, Activation)
    assert stmt2.subj.name == 'BRAF'
    assert stmt2.subj.bound_conditions[0].agent.name == 'MAP2K1'
    assert stmt2.obj.name == 'MAPK1'
    assert stmt2.obj.activity is None
    assert stmt2.obj_activity == 'kinase'
예제 #25
0
def get_bel() -> pybel.BELGraph:
    """Get the HMDD data."""
    #  category	mir	disease	pmid	description
    path = ensure_path(PREFIX, URL)
    df = pd.read_csv(
        path,
        sep='\t',
        dtype=str,
        encoding="ISO-8859-1",
    )

    failed_mirnas = 0
    mirna_to_dsl = {}
    mirnas = df['mir'].unique()
    it = tqdm(mirnas, desc='mapping miRNA names')
    for text in it:
        _, identifier, name = pyobo.ground('mirbase', text)
        if identifier is None:
            it.write(f'[mirbase] could not ground: {text}')
            failed_mirnas += 1
            continue
        mirna_to_dsl[text] = pybel.dsl.MicroRna(
            namespace='mirbase',
            identifier=identifier,
            name=name,
        )

    logger.info(f'failed on {failed_mirnas}/{len(mirnas)} miRNAs')

    failed_diseases = 0
    disease_to_dsl = {}
    diseases = df['disease'].unique()
    it = tqdm(diseases, desc='mapping disease names')
    for text in it:
        prefix, identifier, name = pyobo.ground(['mondo', 'doid', 'efo', 'hp', 'mesh'], text)
        if identifier is None and ', ' in text:
            i = text.index(', ')
            left, right = text[:i], text[i + 2:]
            x = f'{right} {left}'
            prefix, identifier, name = pyobo.ground(['mondo', 'doid', 'efo', 'hp', 'mesh'], x)
            if identifier is None and ', ' in x:
                x2 = ' '.join(z.strip() for z in text.split(',')[::-1])
                prefix, identifier, name = pyobo.ground(['mondo', 'doid', 'efo', 'hp', 'mesh'], x2)
        if identifier is None:
            it.write(f'could not ground {text}')
            failed_diseases += 1
            continue
        disease_to_dsl[text] = pybel.dsl.Pathology(
            namespace=prefix,
            identifier=identifier,
            name=name,
        )

    logger.info(f'failed on {failed_diseases}/{len(diseases)} diseases')

    rv = pybel.BELGraph(name='HMDD', version=VERSION)
    for _category, mir, disease, pmid, text in df.values:
        source = mirna_to_dsl.get(mir)
        target = disease_to_dsl.get(disease)
        if not source or not target:
            continue
        rv.add_regulates(
            source,
            target,
            citation=pmid,
            evidence=text,
        )
    return rv
예제 #26
0
def get_graph(identifier: str, *, rows: Optional[int] = None) -> pybel.BELGraph:
    """Get the graph surrounding a given GO term and its descendants."""
    graph = pybel.BELGraph()
    enrich_graph(graph, identifier, rows=rows)
    return graph
예제 #27
0
def get_similarity_graph(
    *,
    fullgraph=DEFAULT_FULLGRAPH_WITHOUT_CHEMSIM_PICKLE,
    rebuild: bool = False,
    mapping_file=DEFAULT_CHEMICALS_MAPPING_PATH,
    chemsim_graph_path=DEFAULT_CHEMSIM_PICKLE,
    clustered: bool = True,
    similarity=0.7,
    name='Chemical Similarity Graph',
    version='1.1.0',
    authors='',
    contact='',
    description='',
):
    """
    Create a BELGraph with chemicals as nodes, and similarity as edges.

    :param similarity: the percent in which the chemicals are similar
    :param mapping_file: an existing dataframe with pubchemIDs and Smiles
    """
    if not rebuild and os.path.exists(DEFAULT_CHEMSIM_PICKLE):
        return nx.read_edgelist(DEFAULT_CHEMSIM_PICKLE)
    if type(fullgraph) == pybel.struct.graph.BELGraph:
        fullgraph_without_chemsim = fullgraph
    else:
        fullgraph_without_chemsim = pybel.from_pickle(fullgraph)
    pubchem_ids = []
    for node in fullgraph_without_chemsim.nodes():
        if node.namespace != 'pubchem.compound':
            continue
        pubchem_ids.append(node.identifier)

    if os.path.exists(mapping_file):
        chemicals_mapping = pd.read_csv(
            mapping_file,
            sep="\t",
            dtype={
                'PubchemID': str,
                'Smiles': str
            },
            index_col=False,
        )
        pubchem_id_to_smiles = {}
        new_chemicals = []
        smiles = []
        for pubchem_id in tqdm(pubchem_ids, desc="Getting SMILES"):
            if chemicals_mapping.loc[chemicals_mapping["PubchemID"] ==
                                     pubchem_id].empty:
                chemical_smiles = cid_to_smiles(pubchem_id)
                if not isinstance(chemical_smiles, str):
                    chemical_smiles = chemical_smiles.decode("utf-8")
                pubchem_id_to_smiles[pubchem_id] = chemical_smiles
                new_chemicals.append(pubchem_id)
                smiles.append(chemical_smiles)
            else:
                pubchem_id_to_smiles[pubchem_id] = chemicals_mapping.loc[
                    chemicals_mapping["PubchemID"] == pubchem_id,
                    "Smiles"].iloc[0]
        new_df = pd.DataFrame({"PubchemID": new_chemicals, "Smiles": smiles})
        chemicals_mapping = chemicals_mapping.append(new_df)
        chemicals_mapping.to_csv(mapping_file, sep='\t', index=False)
    else:
        pubchem_id_to_smiles = get_smiles(pubchem_ids)

    pubchem_id_to_fingerprint = get_fingerprints(pubchem_id_to_smiles)

    chemsim_graph = pybel.BELGraph(name, version, description, authors,
                                   contact)

    if clustered:
        clustered_df = cluster_chemicals(
            rebuild=True, chemicals_dict=pubchem_id_to_fingerprint)
        clusters = clustered_df['Cluster'].unique().tolist()
        for cluster in tqdm(clusters, desc='Creating similarity BELGraph'):
            chemicals = clustered_df.loc[clustered_df.Cluster == cluster]
            if len(chemicals) == 1:
                continue
            for ind, row in chemicals.iterrows():
                for ind1, row1 in chemicals.iterrows():
                    if row['PubchemID'] == row1['PubchemID']:
                        continue
                    chemical_01 = pybel.dsl.Abundance(
                        namespace='pubchem.compound',
                        identifier=row['PubchemID'])
                    chemical_02 = pybel.dsl.Abundance(
                        namespace='pubchem.compound',
                        identifier=row1['PubchemID'])
                    if chemsim_graph.has_edge(
                            chemical_01,
                            chemical_02) or chemsim_graph.has_edge(
                                chemical_02, chemical_01):
                        continue
                    chemsim_graph.add_unqualified_edge(chemical_01,
                                                       chemical_02,
                                                       'association')
    else:
        similarities = get_similarity(pubchem_id_to_fingerprint)
        for (source_pubchem_id, target_pubchem_id), sim in tqdm(
                similarities.items(), desc='Creating similarity BELGraph'):
            if sim < similarity:
                continue
            chemsim_graph.add_unqualified_edge(
                pybel.dsl.Abundance(namespace=PUBCHEM_NAMESPACE,
                                    identifier=source_pubchem_id),
                pybel.dsl.Abundance(namespace=PUBCHEM_NAMESPACE,
                                    identifier=target_pubchem_id),
                'association',
            )
    pybel.to_pickle(chemsim_graph, chemsim_graph_path)
    return chemsim_graph
예제 #28
0
        CITATION_TYPE: evidence['citation']['type'],
        CITATION_REFERENCE: evidence['citation']['id']
    }


annotation_map = {
    'tissue': 'Tissue',
    'disease': 'Disease',
    'species_common_name': 'Species'
}

species_map = {'human': '9606', 'rat': '10116', 'mouse': '10090'}

annotation_value_map = {'Species': species_map}

graph = pybel.BELGraph()
parser = pybel.parser.BelParser(graph)

for edge in res['graph']['edges']:
    for evidence in edge['metadata']['evidences']:
        if 'citation' not in evidence or not evidence['citation']:
            continue

        d = {}

        if 'biological_context' in evidence:
            annotations = evidence['biological_context']

            if annotations['tissue']:
                d['Tissue'] = annotations['tissue']