def main(): """Build NSoC-KG.""" biogrid_version = bioversions.get_version("biogrid") homolgene_version = bioversions.get_version("homologene") disgenet_version = bioversions.get_version("disgenet") excape_version = EXCAPE_VERSION versions = { "biogrid": biogrid_version, "homologene": homolgene_version, "excape": excape_version, "disgenet": disgenet_version, } statistics = {} triples_path = NSOCKG_MODULE.join(name="triples.tsv") with triples_path.open("w") as file: _excape(statistics, file, excape_version) _biogrid(statistics, file, biogrid_version) _homologene(statistics, file, homolgene_version) _disgenet(statistics, file, disgenet_version) # Count everything statistics["total"] = sum(statistics.values()) rows = [(key, versions[key], statistics[key]) for key in sorted(versions)] rows.append(("total", "", statistics["total"])) print(tabulate(rows, headers=["Source", "Version", "Edges"])) metadata_path = NSOCKG_MODULE.join(name="metadata.json") with metadata_path.open("w") as file: json.dump( fp=file, indent=2, obj={ "date": datetime.datetime.now().strftime("%Y-%m-%d"), "exporter": getpass.getuser(), "versions": versions, "statistics": statistics, }, ) # Automatically upload this revision to Zenodo ensure_zenodo( key="nsockg", data=metadata, paths=[ triples_path, metadata_path, ], )
def get_bel() -> pybel.BELGraph: """Get the Rhea data.""" version = bioversions.get_version('rhea') # Parse the RDF file g = BIO2BEL_MODULE.ensure_rdf('rhea', version, url=URL) # Get a list of all the reactions in the database # (the bidirectionalReaction criterion is added to ensure that we only recieve the nondirectional version of a given reaction) rxns = g.query( """ SELECT ?reaction ?id ?reactionEquation WHERE { ?reaction rh:equation ?reactionEquation . ?reaction rh:bidirectionalReaction ?bdr . ?reaction rh:id ?id } """, ) rv = pybel.BELGraph(name='rhea', version=version) # Loop over reactions, adding reaction nodes to rv as we go # Rather than converting to a set (time-consuming), just let the PyBEL graph handle the occasional duplicate for (reaction_uri, reaction_id, reaction_equation) in rxns: # Retrieve the reactants and products of the reaction participants = _participants(g, reaction_uri) # Add a reaction node to the BELGraph reaction = dsl.Reaction( participants['reactants'], participants['products'], namespace='RHEA', name=reaction_equation, identifier=reaction_id, ) rv.add_node_from_data(reaction) return rv
def get_df() -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" version = bioversions.get_version('biogrid') url = f'{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip' df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version) df['taxonomy_id'] = df['ORGANISM_OFFICIAL_NAME'].map(_lookup) return df
def ensure(version: Optional[str] = None) -> Path: """Ensure the reviewed uniprot names are available.""" if version is None: version = bioversions.get_version('uniprot') return RAW_MODULE.ensure(PREFIX, version, name='reviewed.tsv.gz', url=REVIEWED_URL)
def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame: """Get all ChEBML equivalences.""" if version is None: version = bioversions.get_version('chembl') return pd.concat([ get_chembl_compound_equivalences(version=version), get_chembl_protein_equivalences(version=version), ])
def ensure(version: Optional[str] = None, force: bool = False) -> Path: """Ensure the reviewed uniprot names are available.""" if version is None: version = bioversions.get_version("uniprot") return RAW_MODULE.ensure(PREFIX, version, name="reviewed.tsv.gz", url=REVIEWED_URL, force=force)
def test_get(self): """Test getters.""" prefixes = [ "reactome", "kegg", ] for prefix in prefixes: with self.subTest(prefix=prefix): s = bioversions.get_version(prefix) self.assertIsInstance(s, str)
def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame: """Get PubChem Compound-MeSH xrefs.""" if version is None: version = bioversions.get_version("pubchem") cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH") return pd.DataFrame( [("pubchem.compound", k, "mesh", v, cid_mesh_url) for k, v in get_pubchem_id_to_mesh_id(version=version).items()], columns=XREF_COLUMNS, )
def get_obo() -> Obo: """Get miRBase mature as OBO.""" version = bioversions.get_version("mirbase") return Obo( ontology=PREFIX, name="miRBase Mature", auto_generated_by=f"bio2obo:{PREFIX}", data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), )
def get_obo() -> Obo: """Get MeSH as OBO.""" version = bioversions.get_version("mesh") return Obo( ontology=PREFIX, name="Medical Subject Headings", iter_terms=get_terms, iter_terms_kwargs=dict(version=version), data_version=version, auto_generated_by=f"bio2obo:{PREFIX}", )
def get_obo() -> Obo: """Return ChEMBL as OBO.""" version = bioversions.get_version("chembl") return Obo( ontology="chembl.compound", name="ChEMBL", data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f"bio2obo:{PREFIX}", )
def get_obo() -> Obo: """Get DrugBank Salts as OBO.""" version = bioversions.get_version('drugbank') return Obo( ontology=PREFIX, name='DrugBank Salts', iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), data_version=version, auto_generated_by=f'bio2obo:{PREFIX}', )
def get_obo() -> Obo: """Get DrugCentral OBO.""" version = bioversions.get_version(PREFIX) return Obo( ontology=PREFIX, name='DrugCentral', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f'bio2obo:{PREFIX}', )
def get_obo() -> Obo: """Get miRBase family as OBO.""" version = bioversions.get_version('mirbase') return Obo( ontology=PREFIX, name='miRBase Families', auto_generated_by=f'bio2obo:{PREFIX}', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), )
def get_obo() -> Obo: """Get PFAM as OBO.""" version = bioversions.get_version("pfam") return Obo( ontology=PREFIX, name="PFAM", data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f"bio2obo:{PREFIX}", )
def get_obo() -> Obo: """Get PFAM Clans as OBO.""" version = bioversions.get_version('pfam') return Obo( ontology=PREFIX, name='PFAM Clans', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f'bio2obo:{PREFIX}', )
def get_obo() -> Obo: """Get KEGG Genome as OBO.""" version = bioversions.get_version("kegg") return Obo( ontology=KEGG_GENOME_PREFIX, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), name="KEGG Genome", data_version=version, auto_generated_by=f"bio2obo:{KEGG_GENOME_PREFIX}", )
def get_obo() -> Obo: """Return ChEMBL as OBO.""" version = bioversions.get_version('chembl') return Obo( ontology='chembl.compound', name='ChEMBL', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f'bio2obo:{PREFIX}', )
def get_obo() -> Obo: """Get NPASS as OBO.""" version = bioversions.get_version('npass') return Obo( ontology=PREFIX, name='Natural Products Activity and Species Source Database', iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), auto_generated_by=f'bio2obo:{PREFIX}', pattern=r'NPC\d+', )
def get_obo() -> Obo: """Get ExPASy as OBO.""" version = bioversions.get_version("expasy") return Obo( ontology=PREFIX, name="ExPASy Enzyme Nomenclature", iter_terms=get_terms, iter_terms_kwargs=dict(version=version), data_version=version, typedefs=[has_member, has_molecular_function], auto_generated_by=f"bio2obo:{PREFIX}", )
def get_obo() -> Obo: """Get Rhea as OBO.""" version = bioversions.get_version(PREFIX) return Obo( ontology=PREFIX, name='Rhea', iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), data_version=version, auto_generated_by=f'bio2obo:{PREFIX}', typedefs=[has_lr, has_bi, has_rl], )
def get_obo(force: bool = False) -> Obo: """Get DrugBank as OBO.""" version = bioversions.get_version('drugbank') return Obo( ontology=PREFIX, name='DrugBank', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version, force=force), auto_generated_by=f'bio2obo:{PREFIX}', typedefs=[has_salt], )
def get_obo(skip_missing: bool = True) -> Obo: """Get KEGG Pathways as OBO.""" version = bioversions.get_version('kegg') return Obo( ontology=KEGG_PATHWAY_PREFIX, iter_terms=iter_terms, iter_terms_kwargs=dict(skip_missing=skip_missing, version=version), name='KEGG Pathways', typedefs=[from_kegg_species, from_species, species_specific, has_part], auto_generated_by=f'bio2obo:{KEGG_PATHWAY_PREFIX}', data_version=version, )
def get_obo() -> Obo: """Get miRBase as OBO.""" version = bioversions.get_version(PREFIX) return Obo( ontology=PREFIX, name="miRBase", iter_terms=get_terms, iter_terms_kwargs=dict(version=version), typedefs=[from_species, has_mature], data_version=version, auto_generated_by=f"bio2obo:{PREFIX}", )
def get_obo() -> Obo: """Get KEGG Genes as OBO.""" version = bioversions.get_version("kegg") return Obo( ontology=KEGG_GENES_PREFIX, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), typedefs=[from_species, from_kegg_species, has_gene_product], name="KEGG Genes", data_version=version, auto_generated_by=f"bio2obo:{KEGG_GENES_PREFIX}", )
def get_obo() -> Obo: """Get MSIG as Obo.""" version = bioversions.get_version(PREFIX) return Obo( ontology=PREFIX, name='Molecular Signatures Database', iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), data_version=version, auto_generated_by=f'bio2obo:{PREFIX}', typedefs=[has_part], )
def get_obo() -> Obo: """Get Reactome OBO.""" version = bioversions.get_version('reactome') return Obo( ontology=PREFIX, name='Reactome', iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), typedefs=[from_species, has_part], data_version=version, auto_generated_by=f'bio2obo:{PREFIX}', )
def get_obo() -> Obo: """Get InterPro as OBO.""" version = bioversions.get_version(PREFIX) return Obo( ontology=PREFIX, name="InterPro", data_version=version, auto_generated_by=f"bio2obo:{PREFIX}", iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), )
def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str: if version is None: version = bioversions.get_version("pubchem") # 2 tab-separated columns: compound_id, name cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz") cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force) return cid_name_path
def get_obo() -> Obo: """Get WikiPathways as OBO.""" version = bioversions.get_version('uniprot') return Obo( ontology=PREFIX, name='UniProt', data_version=version, iter_terms=iter_terms, iter_terms_kwargs=dict(version=version), typedefs=[from_species], auto_generated_by=f'bio2obo:{PREFIX}', )