def download(self, record_id: Union[int, str], name: str, *, force: bool = False, parts: PartsHint = None) -> Path: """Download the file for the given record. :param record_id: The Zenodo record id :param name: The name of the file in the Zenodo record :param parts: Optional arguments on where to store with :func:`pystow.ensure`. If none given, goes in ``<PYSTOW_HOME>/zendoo/<CONCEPT_RECORD_ID>/<RECORD>/<PATH>``. Where ``CONCEPT_RECORD_ID`` is the consistent concept record ID for all versions of the same record. If a function is given, the function should take 3 position arguments: concept record id, record id, and version, then return a sequence for PyStow. The name of the file is automatically appended to the end of the sequence. :param force: Should the file be re-downloaded if it already is cached? Defaults to false. :returns: the path to the downloaded file. :raises FileNotFoundError: If the Zenodo record doesn't have a file with the given name For example, to download the most recent version of NSoC-KG, you can use the following command: >>> path = Zenodo().download('4574555', 'triples.tsv') Even as new versions of the data are uploaded, this command will always be able to check if a new version is available, download it if it is, and return the local file path. If the most recent version is already downloaded, then it returns the local file path to the cached file. The file path uses :mod:`pystow` under the ``zenodo`` module and uses the "concept record ID" as a submodule since that is the consistent identifier between different records that are versions of the same data. """ res_json = self.get_record(record_id).json() # conceptrecid is the consistent record ID for all versions of the same record concept_record_id = res_json["conceptrecid"] # FIXME send error report to zenodo about this - shouldn't version be required? version = res_json["metadata"].get("version", "v1") logger.debug("version for zenodo.record:%s is %s", record_id, version) for file in res_json["files"]: if file["key"] == name: url = file["links"]["self"] break else: raise FileNotFoundError( f"zenodo.record:{record_id} does not have a file with key {name}" ) if parts is None: parts = ["zenodo", concept_record_id, version] elif callable(parts): parts = parts(concept_record_id, str(record_id), version) return pystow.ensure(*parts, name=name, url=url, force=force)
def get_n2t(force: bool = True): """Get the N2T registry.""" path = pystow.ensure('bioregistry', url=URL, force=force) # they give malformed YAML so time to write a new parser with open(path) as file: rv = sorted(_parse(file), key=itemgetter('namespace')) nrv = { prefix: _clean_providers(lines) for prefix, lines in itt.groupby(rv, key=itemgetter('namespace')) } with PATH.open('w') as file: yaml.dump(nrv, file) return nrv
def ensure_agrovoc_graph(version: str) -> Graph: """Download and parse the given version of AGROVOC.""" url = f"https://agrovoc.fao.org/agrovocReleases/agrovoc_{version}_core.nt.zip" path = pystow.ensure("bio", "agrovoc", version, url=url, name="core.nt.zip") graph = read_zipfile_rdf(path, inner_path=f"agrovoc_{version}_core.nt", format="nt") graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#") graph.bind("skos", SKOS) graph.bind("dcterms", DCTERMS) graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#") return graph
def download_drugbank( username: Optional[str] = None, password: Optional[str] = None, version: Optional[str] = None, prefix: Union[None, str, Sequence[str]] = None, force: bool = False, ) -> Path: """Download the given version of DrugBank. :param username: The DrugBank username. If not passed, looks up in the environment ``DRUGBANK_USERNAME``. If not found, raises a ValueError. :param password: The DrugBank password. If not passed, looks up in the environment ``DRUGBANK_PASSWORD``. If not found, raises a ValueError. :param version: The DrugBank version. If not passed, uses :mod:`bioversions` to look up the most recent version. :param prefix: The prefix and subkeys passed to :func:`pystow.ensure` to specify a non-default location to download the data to. :param force: Should the data be re-downloaded, even if it exists? :returns: The path to the local DrugBank file after it's been downloaded :raises ImportError: If no version is specified and :mod:`bioversions` is not installed """ if version is None: try: import bioversions except ImportError: raise ImportError( "must first `pip install bioversions` to get latest DrugBank version automatically" ) else: version = bioversions.get_version("drugbank") url = ( f'https://go.drugbank.com/releases/{version.replace(".", "-")}/downloads/all-full-database' ) if prefix is None: prefix = ["drugbank"] elif isinstance(prefix, str): prefix = [prefix] username = get_config("drugbank", "username", passthrough=username, raise_on_missing=True) password = get_config("drugbank", "password", passthrough=password, raise_on_missing=True) return ensure( *prefix, version, url=url, name="full database.xml.zip", download_kwargs=dict( backend="requests", stream=True, auth=(username, password), ), force=force, )
def ensure(version: Optional[str] = None, **kwargs) -> Path: """Ensure the given version is downloaded.""" if version is None: version = get_version() url = get_url(version=version) return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
def get_hetionet() -> BELGraph: """Get Hetionet from GitHub, cache, and convert to BEL.""" path = pystow.ensure('bio2bel', 'hetionet', url=JSON_BZ2_URL) return from_hetionet_gz(path.as_posix())