Ejemplo n.º 1
0
def _add_my_row(
    graph: BELGraph,
    relation: str,
    source_ncbigene_id: str,
    target_ncbigene_id: str,
    pubmed_id: str,
    int_detection_method: str,
    source_database: str,
    confidence: str,
) -> None:  # noqa:C901
    """Add an edge with information about relationship type, source, and target for every PubMed ID.

    :param graph: graph to add edges to
    :param relation: row value of column relation
    :param source_ncbigene_id: row value of column source
    :param target_ncbigene_id: row value of column target
    :param pubmed_id: row value of column pubmed_id
    :param int_detection_method: row value of column interaction detection method
    """
    annotations = {
        'psi-mi': relation,
        'biogrid-detection': int_detection_method,
        'biogrid-source': source_database,
        'biogrid-confidence': confidence,
    }

    if relation in BIOGRID_GENE_ASSOCIATION:
        graph.add_association(
            pybel.dsl.Gene(namespace='ncbigene',
                           identifier=source_ncbigene_id),
            pybel.dsl.Gene(namespace='ncbigene',
                           identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    elif relation in BIOGRID_ASSOCIATION_ACTIONS:
        graph.add_association(
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=source_ncbigene_id),
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    elif relation in BIOGRID_BINDS_ACTIONS:
        graph.add_binds(
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=source_ncbigene_id),
            pybel.dsl.Protein(namespace='ncbigene',
                              identifier=target_ncbigene_id),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )
    else:
        raise ValueError(f'Unhandled BioGrid relation: {relation}')
Ejemplo n.º 2
0
def _add_rows(df: pd.DataFrame, graph: BELGraph) -> None:
    for _, row in df.iterrows():
        effect = row['effect']
        if effect == 0:
            continue  # no binding. Could add negative BEL later

        tf_protein = pybel.dsl.Protein(
            namespace='hgnc',
            identifier=row['tf_hgnc_id'],
            name=row['tf_hgnc_symbol'],
        )
        target_rna = pybel.dsl.Rna(
            namespace='hgnc',
            identifier=row['target_hgnc_id'],
            name=row['target_hgnc_symbol'],
        )
        target_gene = target_rna.get_gene()

        if 'pmids' in row:
            citations = [pmid.strip() for pmid in row['pmids'].split(',')]
        else:
            citations = ['31340985']

        evidence = 'From TFregulons'

        for citation in citations:
            graph.add_binds(
                tf_protein,
                target_gene,
                citation=citation,
                evidence=evidence,
            )

            if effect == 1:
                binds_dna_adder, affects_expression_adder = graph.add_directly_increases, graph.add_increases
            else:
                binds_dna_adder, affects_expression_adder = graph.add_directly_decreases, graph.add_decreases
            binds_dna_adder(
                pybel.dsl.ComplexAbundance([tf_protein, target_gene]),
                target_rna,
                citation=citation,
                evidence=evidence,
            )
            affects_expression_adder(
                tf_protein,
                target_rna,
                citation=citation,
                evidence=evidence,
            )
            graph.add_transcription(target_gene, target_rna)
Ejemplo n.º 3
0
def get_graph_from_cx(network_uuid: str, cx: CX) -> BELGraph:  # noqa: C901
    """Get a PID network from NDEx."""
    metadata = {}
    for entry in iterate_aspect(cx, 'networkAttributes'):
        member_name = entry['n']
        if member_name == 'name':
            metadata['name'] = entry['v']
        elif member_name == 'version':
            metadata['version'] = entry['v']
        elif member_name == 'description':
            metadata['description'] = entry['v']

    graph = BELGraph(**metadata)

    id_to_type = {}
    id_to_members = {}
    id_to_alias = {}
    # TODO nodeAttributes have list of protein definitions for some things
    for entry in iterate_aspect(cx, 'nodeAttributes'):
        node_id = entry['po']
        member_name = entry['n']
        if member_name == 'type':
            id_to_type[node_id] = entry['v']
        elif member_name == 'alias':
            id_to_alias[node_id] = entry['v']
        elif member_name == 'member':
            id_to_members[node_id] = entry['v']
        else:
            logger.warning(f'unhandled node attribute: {member_name}')

    id_to_citations = {}
    for entry in iterate_aspect(cx, 'edgeAttributes'):
        if entry['n'] == 'citation':
            id_to_citations[entry['po']] = [
                x[len('pubmed:'):] for x in entry['v']
            ]

    id_to_dsl = {}
    for node in iterate_aspect(cx, 'nodes'):
        node_id = node['@id']
        reference = node['r']
        if reference in MAPPING:
            id_to_dsl[node_id] = [MAPPING[reference]]
            continue
        if node_id in id_to_members:
            node_type = id_to_type[node_id]
            members = id_to_members[node_id]
            if node_type != 'proteinfamily':
                logger.warning(
                    f'unhandled node: {node_id} type={node_type} members={members}'
                )

            _rv = []
            for member in members:
                if not member.startswith('hgnc.symbol:'):
                    logger.warning(
                        f'unhandled member for node: {node_id} -> {member}')
                    continue
                member_name = member[len('hgnc.symbol:'):]
                member_identifier = _get_hgnc_id_from_name(member_name)
                if member_identifier is None:
                    logger.warning(
                        f'unhandled member for node: {node_id} -> {member}')
                    continue
                _rv.append(
                    pybel.dsl.Protein(namespace='hgnc',
                                      identifier=member_identifier,
                                      name=member_name))
            id_to_dsl[node_id] = _rv
            continue
        if ':' not in reference:
            logger.warning(f'no curie: {node_id} {reference}')
            UNMAPPED.add(reference)
            continue
        prefix, identifier = reference.split(':')
        if prefix == 'hprd':
            # nodes.write(f'unhandled hprd:{identifier}')
            continue
        elif prefix == 'cas':
            # nodes.write(f'unhandled cas:{identifier}')
            continue  # not sure what to do with this
        elif prefix == 'CHEBI':
            name = get_name('chebi', identifier)
            id_to_dsl[node_id] = [
                pybel.dsl.Abundance(namespace='chebi',
                                    identifier=identifier,
                                    name=name)
            ]
        elif prefix == 'uniprot':
            name = node['n']
            hgnc_id = _get_hgnc_id_from_name(name)
            if hgnc_id:
                name = _get_gene_name(identifier)
                if name is None:
                    logger.warning('could not map uniprot to name')
            if identifier is None:
                logger.warning(f'could not map HGNC symbol {name}')
                continue
            id_to_dsl[node_id] = [
                pybel.dsl.Protein(namespace='hgnc',
                                  identifier=identifier,
                                  name=name)
            ]
        else:
            logger.warning(f'unexpected prefix: {prefix}')
            continue

    for edge in iterate_aspect(cx, 'edges'):
        source_id, target_id = edge['s'], edge['t']
        if source_id not in id_to_dsl or target_id not in id_to_dsl:
            continue
        edge_type = edge['i']
        edge_id = edge['@id']

        sources = id_to_dsl[source_id]
        targets = id_to_dsl[target_id]
        citations = id_to_citations.get(edge_id, [('ndex', network_uuid)])
        for source, target, citation in product(sources, targets, citations):
            if edge_type == 'in-complex-with':
                graph.add_binds(source,
                                target,
                                citation=citation,
                                evidence=edge_id)
            elif edge_type == 'controls-phosphorylation-of':
                graph.add_regulates(
                    source,
                    target.with_variants(pybel.dsl.ProteinModification('Ph')),
                    citation=citation,
                    evidence=edge_id,
                )
            elif edge_type in {
                    'controls-transport-of', 'controls-transport-of-chemical'
            }:
                graph.add_regulates(
                    source,
                    target,
                    citation=citation,
                    evidence=edge_id,
                    # object_modifier=pybel.dsl.translocation(),
                )
            elif edge_type == 'chemical-affects':
                graph.add_regulates(
                    source,
                    target,
                    citation=citation,
                    evidence=edge_id,
                    object_modifier=pybel.dsl.activity(),
                )
            elif edge_type in {
                    'controls-expression-of', 'controls-production-of',
                    'consumption-controlled-by', 'controls-state-change-of',
                    'catalysis-precedes'
            }:
                graph.add_regulates(source,
                                    target,
                                    citation=citation,
                                    evidence=edge_id)
            elif edge_type == 'used-to-produce':
                graph.add_node_from_data(
                    pybel.dsl.Reaction(
                        reactants=source,
                        products=target,
                    ))
            elif edge_type == 'reacts-with':
                graph.add_binds(source,
                                target,
                                citation=citation,
                                evidence=edge_id)
                # graph.add_node_from_data(pybel.dsl.Reaction(
                #     reactants=[source, target],
                # ))

            else:
                logger.warning(
                    f'unhandled edge type: {source} {edge_type} {target}')

    return graph
Ejemplo n.º 4
0
def _add_row(
    graph: BELGraph,
    relation: str,
    source_prefix: str,
    source_id: str,
    source_name: Optional[str],
    target_prefix: str,
    target_id: str,
    target_name: Optional[str],
    pubmed_id: str,
    int_detection_method: str,
    source_database: str,
    confidence: str,
) -> None:  # noqa:C901
    """Add for every PubMed ID an edge with information about relationship type, source and target.

    :param source_database: row value of column source_database
    :param graph: graph to add edges to
    :param relation: row value of column relation
    :param source_prefix: row value of source prefix
    :param source_id: row value of source id
    :param target_prefix: row value of target prefix
    :param target_id: row value of target id
    :param pubmed_id: row value of column PubMed_id
    :param int_detection_method: row value of column interaction detection method
    :param confidence: row value of confidence score column
    :return: None
    """
    if pubmed_id is None:
        pubmed_id = 'database', 'intact'

    annotations = {
        'psi-mi': relation,
        'intact-detection': int_detection_method,
        'intact-source': source_database,
        'intact-confidence': confidence,
    }

    # map double spaces to single spaces in relation string
    relation = ' '.join(relation.split())

    source_dsl = NAMESPACE_TO_DSL.get(source_prefix, pybel.dsl.Protein)
    source = source_dsl(
        namespace=source_prefix,
        identifier=source_id,
        name=source_name,
    )
    target_dsl = NAMESPACE_TO_DSL.get(target_prefix, pybel.dsl.Protein)
    target = target_dsl(
        namespace=target_prefix,
        identifier=target_id,
        name=target_name,
    )

    if relation in PROTEIN_INCREASES_MOD_DICT:
        graph.add_increases(
            source,
            target.with_variants(PROTEIN_INCREASES_MOD_DICT[relation]),
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
            subject_modifier=SUBJECT_ACTIVITIES.get(relation),
        )

    # dna strand elongation
    elif relation == 'psi-mi:"MI:0701"(dna strand elongation)':
        target_mod = pybel.dsl.Gene(
            namespace=target_prefix,
            identifier=target_id,
            name=target_name,
            variants=[
                GeneModification(
                    name='DNA strand elongation',
                    namespace='go',
                    identifier='0022616',
                ),
            ],
        )
        graph.add_increases(
            source,
            target_mod,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # DECREASES
    elif relation in INTACT_DECREASES_ACTIONS:
        #: dna cleavage: Covalent bond breakage of a DNA molecule leading to the formation of smaller fragments
        if relation == 'psi-mi:"MI:0572"(dna cleavage)':
            target_mod = pybel.dsl.Gene(
                namespace=target_prefix,
                identifier=source_id,
                name=target_name,
            )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        #: rna cleavage: Any process by which an RNA molecule is cleaved at specific sites or in a regulated manner
        elif relation == 'psi-mi:"MI:0902"(rna cleavage)':
            target_mod = pybel.dsl.Rna(
                namespace=target_prefix,
                identifier=source_id,
                name=target_name,
            )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )

        # cleavage
        elif relation in {
                #: Covalent bond breakage in a molecule leading to the formation of smaller molecules
                'psi-mi:"MI:0194"(cleavage reaction)',
                #: Covalent modification of a polypeptide occuring during its maturation or its proteolytic degradation
                'psi-mi:"MI:0570"(protein cleavage)',
        }:
            graph.add_decreases(
                source,
                target,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )

        #: Reaction monitoring the cleavage (hydrolysis) or a lipid molecule
        elif relation == 'psi-mi:"MI:1355"(lipid cleavage)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='lipid catabolic process',
                    namespace='go',
                    identifier='0016042',
                ), )

            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        #: 'lipoprotein cleavage reaction': Cleavage of a lipid group covalently bound to a protein residue
        elif relation == 'psi-mi:"MI:0212"(lipoprotein cleavage reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='lipoprotein modification',
                    namespace='go',
                    identifier='0042160',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        # deformylation reaction
        elif relation == 'psi-mi:"MI:0199"(deformylation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein formylation',
                    namespace='go',
                    identifier='0018256',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein deamidation
        elif relation == 'psi-mi:"MI:2280"(deamidation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein amidation',
                    namespace='go',
                    identifier='0018032',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
                object_modifier=pybel.dsl.activity(),
            )

        # protein decarboxylation
        elif relation == 'psi-mi:"MI:1140"(decarboxylation reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='protein carboxylation',
                    namespace='go',
                    identifier='0018214',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein deamination:
        elif relation == 'psi-mi:"MI:0985"(deamination reaction)':
            target_mod = target.with_variants(
                pybel.dsl.ProteinModification(
                    name='amine binding',
                    namespace='go',
                    identifier='0043176',
                ), )
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        # protein modification
        elif relation in PROTEIN_DECREASES_MOD_DICT:
            target_mod = target.with_variants(
                PROTEIN_DECREASES_MOD_DICT[relation])
            graph.add_decreases(
                source,
                target_mod,
                citation=pubmed_id,
                evidence=EVIDENCE,
                annotations=annotations,
            )
        else:
            raise ValueError(
                f"The relation {relation} is not in DECREASE relations.")

    # ASSOCIATION:
    elif relation in INTACT_ASSOCIATION_ACTIONS:
        graph.add_association(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # REGULATES:
    elif relation in INTACT_REGULATES_ACTIONS:
        graph.add_regulates(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # BINDS
    elif relation in INTACT_BINDS_ACTIONS:
        graph.add_binds(
            source,
            target,
            citation=pubmed_id,
            evidence=EVIDENCE,
            annotations=annotations,
        )

    # no specified relation
    else:
        raise ValueError(
            f"Unspecified relation {relation} between {source} and {target}")