def _add_my_row( graph: BELGraph, relation: str, source_ncbigene_id: str, target_ncbigene_id: str, pubmed_id: str, int_detection_method: str, source_database: str, confidence: str, ) -> None: # noqa:C901 """Add an edge with information about relationship type, source, and target for every PubMed ID. :param graph: graph to add edges to :param relation: row value of column relation :param source_ncbigene_id: row value of column source :param target_ncbigene_id: row value of column target :param pubmed_id: row value of column pubmed_id :param int_detection_method: row value of column interaction detection method """ annotations = { 'psi-mi': relation, 'biogrid-detection': int_detection_method, 'biogrid-source': source_database, 'biogrid-confidence': confidence, } if relation in BIOGRID_GENE_ASSOCIATION: graph.add_association( pybel.dsl.Gene(namespace='ncbigene', identifier=source_ncbigene_id), pybel.dsl.Gene(namespace='ncbigene', identifier=target_ncbigene_id), citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) elif relation in BIOGRID_ASSOCIATION_ACTIONS: graph.add_association( pybel.dsl.Protein(namespace='ncbigene', identifier=source_ncbigene_id), pybel.dsl.Protein(namespace='ncbigene', identifier=target_ncbigene_id), citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) elif relation in BIOGRID_BINDS_ACTIONS: graph.add_binds( pybel.dsl.Protein(namespace='ncbigene', identifier=source_ncbigene_id), pybel.dsl.Protein(namespace='ncbigene', identifier=target_ncbigene_id), citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) else: raise ValueError(f'Unhandled BioGrid relation: {relation}')
def _add_rows(df: pd.DataFrame, graph: BELGraph) -> None: for _, row in df.iterrows(): effect = row['effect'] if effect == 0: continue # no binding. Could add negative BEL later tf_protein = pybel.dsl.Protein( namespace='hgnc', identifier=row['tf_hgnc_id'], name=row['tf_hgnc_symbol'], ) target_rna = pybel.dsl.Rna( namespace='hgnc', identifier=row['target_hgnc_id'], name=row['target_hgnc_symbol'], ) target_gene = target_rna.get_gene() if 'pmids' in row: citations = [pmid.strip() for pmid in row['pmids'].split(',')] else: citations = ['31340985'] evidence = 'From TFregulons' for citation in citations: graph.add_binds( tf_protein, target_gene, citation=citation, evidence=evidence, ) if effect == 1: binds_dna_adder, affects_expression_adder = graph.add_directly_increases, graph.add_increases else: binds_dna_adder, affects_expression_adder = graph.add_directly_decreases, graph.add_decreases binds_dna_adder( pybel.dsl.ComplexAbundance([tf_protein, target_gene]), target_rna, citation=citation, evidence=evidence, ) affects_expression_adder( tf_protein, target_rna, citation=citation, evidence=evidence, ) graph.add_transcription(target_gene, target_rna)
def get_graph_from_cx(network_uuid: str, cx: CX) -> BELGraph: # noqa: C901 """Get a PID network from NDEx.""" metadata = {} for entry in iterate_aspect(cx, 'networkAttributes'): member_name = entry['n'] if member_name == 'name': metadata['name'] = entry['v'] elif member_name == 'version': metadata['version'] = entry['v'] elif member_name == 'description': metadata['description'] = entry['v'] graph = BELGraph(**metadata) id_to_type = {} id_to_members = {} id_to_alias = {} # TODO nodeAttributes have list of protein definitions for some things for entry in iterate_aspect(cx, 'nodeAttributes'): node_id = entry['po'] member_name = entry['n'] if member_name == 'type': id_to_type[node_id] = entry['v'] elif member_name == 'alias': id_to_alias[node_id] = entry['v'] elif member_name == 'member': id_to_members[node_id] = entry['v'] else: logger.warning(f'unhandled node attribute: {member_name}') id_to_citations = {} for entry in iterate_aspect(cx, 'edgeAttributes'): if entry['n'] == 'citation': id_to_citations[entry['po']] = [ x[len('pubmed:'):] for x in entry['v'] ] id_to_dsl = {} for node in iterate_aspect(cx, 'nodes'): node_id = node['@id'] reference = node['r'] if reference in MAPPING: id_to_dsl[node_id] = [MAPPING[reference]] continue if node_id in id_to_members: node_type = id_to_type[node_id] members = id_to_members[node_id] if node_type != 'proteinfamily': logger.warning( f'unhandled node: {node_id} type={node_type} members={members}' ) _rv = [] for member in members: if not member.startswith('hgnc.symbol:'): logger.warning( f'unhandled member for node: {node_id} -> {member}') continue member_name = member[len('hgnc.symbol:'):] member_identifier = _get_hgnc_id_from_name(member_name) if member_identifier is None: logger.warning( f'unhandled member for node: {node_id} -> {member}') continue _rv.append( pybel.dsl.Protein(namespace='hgnc', identifier=member_identifier, name=member_name)) id_to_dsl[node_id] = _rv continue if ':' not in reference: logger.warning(f'no curie: {node_id} {reference}') UNMAPPED.add(reference) continue prefix, identifier = reference.split(':') if prefix == 'hprd': # nodes.write(f'unhandled hprd:{identifier}') continue elif prefix == 'cas': # nodes.write(f'unhandled cas:{identifier}') continue # not sure what to do with this elif prefix == 'CHEBI': name = get_name('chebi', identifier) id_to_dsl[node_id] = [ pybel.dsl.Abundance(namespace='chebi', identifier=identifier, name=name) ] elif prefix == 'uniprot': name = node['n'] hgnc_id = _get_hgnc_id_from_name(name) if hgnc_id: name = _get_gene_name(identifier) if name is None: logger.warning('could not map uniprot to name') if identifier is None: logger.warning(f'could not map HGNC symbol {name}') continue id_to_dsl[node_id] = [ pybel.dsl.Protein(namespace='hgnc', identifier=identifier, name=name) ] else: logger.warning(f'unexpected prefix: {prefix}') continue for edge in iterate_aspect(cx, 'edges'): source_id, target_id = edge['s'], edge['t'] if source_id not in id_to_dsl or target_id not in id_to_dsl: continue edge_type = edge['i'] edge_id = edge['@id'] sources = id_to_dsl[source_id] targets = id_to_dsl[target_id] citations = id_to_citations.get(edge_id, [('ndex', network_uuid)]) for source, target, citation in product(sources, targets, citations): if edge_type == 'in-complex-with': graph.add_binds(source, target, citation=citation, evidence=edge_id) elif edge_type == 'controls-phosphorylation-of': graph.add_regulates( source, target.with_variants(pybel.dsl.ProteinModification('Ph')), citation=citation, evidence=edge_id, ) elif edge_type in { 'controls-transport-of', 'controls-transport-of-chemical' }: graph.add_regulates( source, target, citation=citation, evidence=edge_id, # object_modifier=pybel.dsl.translocation(), ) elif edge_type == 'chemical-affects': graph.add_regulates( source, target, citation=citation, evidence=edge_id, object_modifier=pybel.dsl.activity(), ) elif edge_type in { 'controls-expression-of', 'controls-production-of', 'consumption-controlled-by', 'controls-state-change-of', 'catalysis-precedes' }: graph.add_regulates(source, target, citation=citation, evidence=edge_id) elif edge_type == 'used-to-produce': graph.add_node_from_data( pybel.dsl.Reaction( reactants=source, products=target, )) elif edge_type == 'reacts-with': graph.add_binds(source, target, citation=citation, evidence=edge_id) # graph.add_node_from_data(pybel.dsl.Reaction( # reactants=[source, target], # )) else: logger.warning( f'unhandled edge type: {source} {edge_type} {target}') return graph
def _add_row( graph: BELGraph, relation: str, source_prefix: str, source_id: str, source_name: Optional[str], target_prefix: str, target_id: str, target_name: Optional[str], pubmed_id: str, int_detection_method: str, source_database: str, confidence: str, ) -> None: # noqa:C901 """Add for every PubMed ID an edge with information about relationship type, source and target. :param source_database: row value of column source_database :param graph: graph to add edges to :param relation: row value of column relation :param source_prefix: row value of source prefix :param source_id: row value of source id :param target_prefix: row value of target prefix :param target_id: row value of target id :param pubmed_id: row value of column PubMed_id :param int_detection_method: row value of column interaction detection method :param confidence: row value of confidence score column :return: None """ if pubmed_id is None: pubmed_id = 'database', 'intact' annotations = { 'psi-mi': relation, 'intact-detection': int_detection_method, 'intact-source': source_database, 'intact-confidence': confidence, } # map double spaces to single spaces in relation string relation = ' '.join(relation.split()) source_dsl = NAMESPACE_TO_DSL.get(source_prefix, pybel.dsl.Protein) source = source_dsl( namespace=source_prefix, identifier=source_id, name=source_name, ) target_dsl = NAMESPACE_TO_DSL.get(target_prefix, pybel.dsl.Protein) target = target_dsl( namespace=target_prefix, identifier=target_id, name=target_name, ) if relation in PROTEIN_INCREASES_MOD_DICT: graph.add_increases( source, target.with_variants(PROTEIN_INCREASES_MOD_DICT[relation]), citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, subject_modifier=SUBJECT_ACTIVITIES.get(relation), ) # dna strand elongation elif relation == 'psi-mi:"MI:0701"(dna strand elongation)': target_mod = pybel.dsl.Gene( namespace=target_prefix, identifier=target_id, name=target_name, variants=[ GeneModification( name='DNA strand elongation', namespace='go', identifier='0022616', ), ], ) graph.add_increases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # DECREASES elif relation in INTACT_DECREASES_ACTIONS: #: dna cleavage: Covalent bond breakage of a DNA molecule leading to the formation of smaller fragments if relation == 'psi-mi:"MI:0572"(dna cleavage)': target_mod = pybel.dsl.Gene( namespace=target_prefix, identifier=source_id, name=target_name, ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) #: rna cleavage: Any process by which an RNA molecule is cleaved at specific sites or in a regulated manner elif relation == 'psi-mi:"MI:0902"(rna cleavage)': target_mod = pybel.dsl.Rna( namespace=target_prefix, identifier=source_id, name=target_name, ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # cleavage elif relation in { #: Covalent bond breakage in a molecule leading to the formation of smaller molecules 'psi-mi:"MI:0194"(cleavage reaction)', #: Covalent modification of a polypeptide occuring during its maturation or its proteolytic degradation 'psi-mi:"MI:0570"(protein cleavage)', }: graph.add_decreases( source, target, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) #: Reaction monitoring the cleavage (hydrolysis) or a lipid molecule elif relation == 'psi-mi:"MI:1355"(lipid cleavage)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='lipid catabolic process', namespace='go', identifier='0016042', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, object_modifier=pybel.dsl.activity(), ) #: 'lipoprotein cleavage reaction': Cleavage of a lipid group covalently bound to a protein residue elif relation == 'psi-mi:"MI:0212"(lipoprotein cleavage reaction)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='lipoprotein modification', namespace='go', identifier='0042160', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, object_modifier=pybel.dsl.activity(), ) # deformylation reaction elif relation == 'psi-mi:"MI:0199"(deformylation reaction)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='protein formylation', namespace='go', identifier='0018256', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # protein deamidation elif relation == 'psi-mi:"MI:2280"(deamidation reaction)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='protein amidation', namespace='go', identifier='0018032', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, object_modifier=pybel.dsl.activity(), ) # protein decarboxylation elif relation == 'psi-mi:"MI:1140"(decarboxylation reaction)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='protein carboxylation', namespace='go', identifier='0018214', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # protein deamination: elif relation == 'psi-mi:"MI:0985"(deamination reaction)': target_mod = target.with_variants( pybel.dsl.ProteinModification( name='amine binding', namespace='go', identifier='0043176', ), ) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # protein modification elif relation in PROTEIN_DECREASES_MOD_DICT: target_mod = target.with_variants( PROTEIN_DECREASES_MOD_DICT[relation]) graph.add_decreases( source, target_mod, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) else: raise ValueError( f"The relation {relation} is not in DECREASE relations.") # ASSOCIATION: elif relation in INTACT_ASSOCIATION_ACTIONS: graph.add_association( source, target, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # REGULATES: elif relation in INTACT_REGULATES_ACTIONS: graph.add_regulates( source, target, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # BINDS elif relation in INTACT_BINDS_ACTIONS: graph.add_binds( source, target, citation=pubmed_id, evidence=EVIDENCE, annotations=annotations, ) # no specified relation else: raise ValueError( f"Unspecified relation {relation} between {source} and {target}")