Ejemplo n.º 1
0
def main():
    """Build NSoC-KG."""
    biogrid_version = bioversions.get_version("biogrid")
    homolgene_version = bioversions.get_version("homologene")
    disgenet_version = bioversions.get_version("disgenet")
    excape_version = EXCAPE_VERSION
    versions = {
        "biogrid": biogrid_version,
        "homologene": homolgene_version,
        "excape": excape_version,
        "disgenet": disgenet_version,
    }

    statistics = {}
    triples_path = NSOCKG_MODULE.join(name="triples.tsv")
    with triples_path.open("w") as file:
        _excape(statistics, file, excape_version)
        _biogrid(statistics, file, biogrid_version)
        _homologene(statistics, file, homolgene_version)
        _disgenet(statistics, file, disgenet_version)

    # Count everything
    statistics["total"] = sum(statistics.values())

    rows = [(key, versions[key], statistics[key]) for key in sorted(versions)]
    rows.append(("total", "", statistics["total"]))
    print(tabulate(rows, headers=["Source", "Version", "Edges"]))

    metadata_path = NSOCKG_MODULE.join(name="metadata.json")
    with metadata_path.open("w") as file:
        json.dump(
            fp=file,
            indent=2,
            obj={
                "date": datetime.datetime.now().strftime("%Y-%m-%d"),
                "exporter": getpass.getuser(),
                "versions": versions,
                "statistics": statistics,
            },
        )

    # Automatically upload this revision to Zenodo
    ensure_zenodo(
        key="nsockg",
        data=metadata,
        paths=[
            triples_path,
            metadata_path,
        ],
    )
Ejemplo n.º 2
0
def get_bel() -> pybel.BELGraph:
    """Get the Rhea data."""
    version = bioversions.get_version('rhea')
    # Parse the RDF file
    g = BIO2BEL_MODULE.ensure_rdf('rhea', version, url=URL)
    # Get a list of all the reactions in the database
    # (the bidirectionalReaction criterion is added to ensure that we only recieve the nondirectional version of a given reaction)
    rxns = g.query(
        """
        SELECT ?reaction ?id ?reactionEquation WHERE {
            ?reaction rh:equation ?reactionEquation .
            ?reaction rh:bidirectionalReaction ?bdr .
            ?reaction rh:id ?id
        }
        """,
    )
    rv = pybel.BELGraph(name='rhea', version=version)
    # Loop over reactions, adding reaction nodes to rv as we go
    # Rather than converting to a set (time-consuming), just let the PyBEL graph handle the occasional duplicate
    for (reaction_uri, reaction_id, reaction_equation) in rxns:
        # Retrieve the reactants and products of the reaction
        participants = _participants(g, reaction_uri)
        # Add a reaction node to the BELGraph
        reaction = dsl.Reaction(
            participants['reactants'],
            participants['products'],
            namespace='RHEA',
            name=reaction_equation,
            identifier=reaction_id,
        )
        rv.add_node_from_data(reaction)
    return rv
Ejemplo n.º 3
0
def get_df() -> pd.DataFrame:
    """Get the BioGRID identifiers mapping dataframe."""
    version = bioversions.get_version('biogrid')
    url = f'{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip'
    df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
    df['taxonomy_id'] = df['ORGANISM_OFFICIAL_NAME'].map(_lookup)
    return df
Ejemplo n.º 4
0
def ensure(version: Optional[str] = None) -> Path:
    """Ensure the reviewed uniprot names are available."""
    if version is None:
        version = bioversions.get_version('uniprot')
    return RAW_MODULE.ensure(PREFIX,
                             version,
                             name='reviewed.tsv.gz',
                             url=REVIEWED_URL)
Ejemplo n.º 5
0
def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame:
    """Get all ChEBML equivalences."""
    if version is None:
        version = bioversions.get_version('chembl')

    return pd.concat([
        get_chembl_compound_equivalences(version=version),
        get_chembl_protein_equivalences(version=version),
    ])
Ejemplo n.º 6
0
def ensure(version: Optional[str] = None, force: bool = False) -> Path:
    """Ensure the reviewed uniprot names are available."""
    if version is None:
        version = bioversions.get_version("uniprot")
    return RAW_MODULE.ensure(PREFIX,
                             version,
                             name="reviewed.tsv.gz",
                             url=REVIEWED_URL,
                             force=force)
Ejemplo n.º 7
0
 def test_get(self):
     """Test getters."""
     prefixes = [
         "reactome",
         "kegg",
     ]
     for prefix in prefixes:
         with self.subTest(prefix=prefix):
             s = bioversions.get_version(prefix)
             self.assertIsInstance(s, str)
Ejemplo n.º 8
0
def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
    """Get PubChem Compound-MeSH xrefs."""
    if version is None:
        version = bioversions.get_version("pubchem")
    cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
    return pd.DataFrame(
        [("pubchem.compound", k, "mesh", v, cid_mesh_url)
         for k, v in get_pubchem_id_to_mesh_id(version=version).items()],
        columns=XREF_COLUMNS,
    )
Ejemplo n.º 9
0
def get_obo() -> Obo:
    """Get miRBase mature as OBO."""
    version = bioversions.get_version("mirbase")
    return Obo(
        ontology=PREFIX,
        name="miRBase Mature",
        auto_generated_by=f"bio2obo:{PREFIX}",
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
    )
Ejemplo n.º 10
0
def get_obo() -> Obo:
    """Get MeSH as OBO."""
    version = bioversions.get_version("mesh")
    return Obo(
        ontology=PREFIX,
        name="Medical Subject Headings",
        iter_terms=get_terms,
        iter_terms_kwargs=dict(version=version),
        data_version=version,
        auto_generated_by=f"bio2obo:{PREFIX}",
    )
Ejemplo n.º 11
0
def get_obo() -> Obo:
    """Return ChEMBL as OBO."""
    version = bioversions.get_version("chembl")
    return Obo(
        ontology="chembl.compound",
        name="ChEMBL",
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f"bio2obo:{PREFIX}",
    )
Ejemplo n.º 12
0
def get_obo() -> Obo:
    """Get DrugBank Salts as OBO."""
    version = bioversions.get_version('drugbank')
    return Obo(
        ontology=PREFIX,
        name='DrugBank Salts',
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        data_version=version,
        auto_generated_by=f'bio2obo:{PREFIX}',
    )
Ejemplo n.º 13
0
def get_obo() -> Obo:
    """Get DrugCentral OBO."""
    version = bioversions.get_version(PREFIX)
    return Obo(
        ontology=PREFIX,
        name='DrugCentral',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f'bio2obo:{PREFIX}',
    )
Ejemplo n.º 14
0
def get_obo() -> Obo:
    """Get miRBase family as OBO."""
    version = bioversions.get_version('mirbase')
    return Obo(
        ontology=PREFIX,
        name='miRBase Families',
        auto_generated_by=f'bio2obo:{PREFIX}',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
    )
Ejemplo n.º 15
0
def get_obo() -> Obo:
    """Get PFAM as OBO."""
    version = bioversions.get_version("pfam")
    return Obo(
        ontology=PREFIX,
        name="PFAM",
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f"bio2obo:{PREFIX}",
    )
Ejemplo n.º 16
0
def get_obo() -> Obo:
    """Get PFAM Clans as OBO."""
    version = bioversions.get_version('pfam')
    return Obo(
        ontology=PREFIX,
        name='PFAM Clans',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f'bio2obo:{PREFIX}',
    )
Ejemplo n.º 17
0
def get_obo() -> Obo:
    """Get KEGG Genome as OBO."""
    version = bioversions.get_version("kegg")
    return Obo(
        ontology=KEGG_GENOME_PREFIX,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        name="KEGG Genome",
        data_version=version,
        auto_generated_by=f"bio2obo:{KEGG_GENOME_PREFIX}",
    )
Ejemplo n.º 18
0
def get_obo() -> Obo:
    """Return ChEMBL as OBO."""
    version = bioversions.get_version('chembl')
    return Obo(
        ontology='chembl.compound',
        name='ChEMBL',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f'bio2obo:{PREFIX}',
    )
Ejemplo n.º 19
0
def get_obo() -> Obo:
    """Get NPASS as OBO."""
    version = bioversions.get_version('npass')
    return Obo(
        ontology=PREFIX,
        name='Natural Products Activity and Species Source Database',
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        auto_generated_by=f'bio2obo:{PREFIX}',
        pattern=r'NPC\d+',
    )
Ejemplo n.º 20
0
def get_obo() -> Obo:
    """Get ExPASy as OBO."""
    version = bioversions.get_version("expasy")
    return Obo(
        ontology=PREFIX,
        name="ExPASy Enzyme Nomenclature",
        iter_terms=get_terms,
        iter_terms_kwargs=dict(version=version),
        data_version=version,
        typedefs=[has_member, has_molecular_function],
        auto_generated_by=f"bio2obo:{PREFIX}",
    )
Ejemplo n.º 21
0
def get_obo() -> Obo:
    """Get Rhea as OBO."""
    version = bioversions.get_version(PREFIX)
    return Obo(
        ontology=PREFIX,
        name='Rhea',
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        data_version=version,
        auto_generated_by=f'bio2obo:{PREFIX}',
        typedefs=[has_lr, has_bi, has_rl],
    )
Ejemplo n.º 22
0
def get_obo(force: bool = False) -> Obo:
    """Get DrugBank as OBO."""
    version = bioversions.get_version('drugbank')
    return Obo(
        ontology=PREFIX,
        name='DrugBank',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version, force=force),
        auto_generated_by=f'bio2obo:{PREFIX}',
        typedefs=[has_salt],
    )
Ejemplo n.º 23
0
def get_obo(skip_missing: bool = True) -> Obo:
    """Get KEGG Pathways as OBO."""
    version = bioversions.get_version('kegg')
    return Obo(
        ontology=KEGG_PATHWAY_PREFIX,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(skip_missing=skip_missing, version=version),
        name='KEGG Pathways',
        typedefs=[from_kegg_species, from_species, species_specific, has_part],
        auto_generated_by=f'bio2obo:{KEGG_PATHWAY_PREFIX}',
        data_version=version,
    )
Ejemplo n.º 24
0
def get_obo() -> Obo:
    """Get miRBase as OBO."""
    version = bioversions.get_version(PREFIX)
    return Obo(
        ontology=PREFIX,
        name="miRBase",
        iter_terms=get_terms,
        iter_terms_kwargs=dict(version=version),
        typedefs=[from_species, has_mature],
        data_version=version,
        auto_generated_by=f"bio2obo:{PREFIX}",
    )
Ejemplo n.º 25
0
def get_obo() -> Obo:
    """Get KEGG Genes as OBO."""
    version = bioversions.get_version("kegg")
    return Obo(
        ontology=KEGG_GENES_PREFIX,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        typedefs=[from_species, from_kegg_species, has_gene_product],
        name="KEGG Genes",
        data_version=version,
        auto_generated_by=f"bio2obo:{KEGG_GENES_PREFIX}",
    )
Ejemplo n.º 26
0
def get_obo() -> Obo:
    """Get MSIG as Obo."""
    version = bioversions.get_version(PREFIX)
    return Obo(
        ontology=PREFIX,
        name='Molecular Signatures Database',
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        data_version=version,
        auto_generated_by=f'bio2obo:{PREFIX}',
        typedefs=[has_part],
    )
Ejemplo n.º 27
0
def get_obo() -> Obo:
    """Get Reactome OBO."""
    version = bioversions.get_version('reactome')
    return Obo(
        ontology=PREFIX,
        name='Reactome',
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        typedefs=[from_species, has_part],
        data_version=version,
        auto_generated_by=f'bio2obo:{PREFIX}',
    )
Ejemplo n.º 28
0
def get_obo() -> Obo:
    """Get InterPro as OBO."""
    version = bioversions.get_version(PREFIX)

    return Obo(
        ontology=PREFIX,
        name="InterPro",
        data_version=version,
        auto_generated_by=f"bio2obo:{PREFIX}",
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
    )
Ejemplo n.º 29
0
def _ensure_cid_name_path(*,
                          version: Optional[str] = None,
                          force: bool = False) -> str:
    if version is None:
        version = bioversions.get_version("pubchem")
    # 2 tab-separated columns: compound_id, name
    cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
    cid_name_path = ensure_path(PREFIX,
                                url=cid_name_url,
                                version=version,
                                force=force)
    return cid_name_path
Ejemplo n.º 30
0
def get_obo() -> Obo:
    """Get WikiPathways as OBO."""
    version = bioversions.get_version('uniprot')
    return Obo(
        ontology=PREFIX,
        name='UniProt',
        data_version=version,
        iter_terms=iter_terms,
        iter_terms_kwargs=dict(version=version),
        typedefs=[from_species],
        auto_generated_by=f'bio2obo:{PREFIX}',
    )