コード例 #1
0
ファイル: api.py プロジェクト: cthoyt/zenodo-client
    def download(self,
                 record_id: Union[int, str],
                 name: str,
                 *,
                 force: bool = False,
                 parts: PartsHint = None) -> Path:
        """Download the file for the given record.

        :param record_id: The Zenodo record id
        :param name: The name of the file in the Zenodo record
        :param parts: Optional arguments on where to store with :func:`pystow.ensure`. If none given, goes in
            ``<PYSTOW_HOME>/zendoo/<CONCEPT_RECORD_ID>/<RECORD>/<PATH>``. Where ``CONCEPT_RECORD_ID`` is the
            consistent concept record ID for all versions of the same record. If a function is given, the function
            should take 3 position arguments: concept record id, record id, and version, then return a sequence for
            PyStow. The name of the file is automatically appended to the end of the sequence.
        :param force: Should the file be re-downloaded if it already is cached? Defaults to false.
        :returns: the path to the downloaded file.
        :raises FileNotFoundError: If the Zenodo record doesn't have a file with the given name

        For example, to download the most recent version of NSoC-KG, you can
        use the following command:

        >>> path = Zenodo().download('4574555', 'triples.tsv')

        Even as new versions of the data are uploaded, this command will always
        be able to check if a new version is available, download it if it is, and
        return the local file path. If the most recent version is already downloaded,
        then it returns the local file path to the cached file.

        The file path uses :mod:`pystow` under the ``zenodo`` module and uses the
        "concept record ID" as a submodule since that is the consistent identifier
        between different records that are versions of the same data.
        """
        res_json = self.get_record(record_id).json()
        # conceptrecid is the consistent record ID for all versions of the same record
        concept_record_id = res_json["conceptrecid"]
        # FIXME send error report to zenodo about this - shouldn't version be required?
        version = res_json["metadata"].get("version", "v1")
        logger.debug("version for zenodo.record:%s is %s", record_id, version)

        for file in res_json["files"]:
            if file["key"] == name:
                url = file["links"]["self"]
                break
        else:
            raise FileNotFoundError(
                f"zenodo.record:{record_id} does not have a file with key {name}"
            )

        if parts is None:
            parts = ["zenodo", concept_record_id, version]
        elif callable(parts):
            parts = parts(concept_record_id, str(record_id), version)
        return pystow.ensure(*parts, name=name, url=url, force=force)
コード例 #2
0
def get_n2t(force: bool = True):
    """Get the N2T registry."""
    path = pystow.ensure('bioregistry', url=URL, force=force)
    # they give malformed YAML so time to write a new parser
    with open(path) as file:
        rv = sorted(_parse(file), key=itemgetter('namespace'))

    nrv = {
        prefix: _clean_providers(lines)
        for prefix, lines in itt.groupby(rv, key=itemgetter('namespace'))
    }

    with PATH.open('w') as file:
        yaml.dump(nrv, file)
    return nrv
コード例 #3
0
def ensure_agrovoc_graph(version: str) -> Graph:
    """Download and parse the given version of AGROVOC."""
    url = f"https://agrovoc.fao.org/agrovocReleases/agrovoc_{version}_core.nt.zip"
    path = pystow.ensure("bio",
                         "agrovoc",
                         version,
                         url=url,
                         name="core.nt.zip")
    graph = read_zipfile_rdf(path,
                             inner_path=f"agrovoc_{version}_core.nt",
                             format="nt")
    graph.bind("skosxl", "http://www.w3.org/2008/05/skos-xl#")
    graph.bind("skos", SKOS)
    graph.bind("dcterms", DCTERMS)
    graph.bind("agrovoc", "http://aims.fao.org/aos/agrontology#")
    return graph
コード例 #4
0
def download_drugbank(
    username: Optional[str] = None,
    password: Optional[str] = None,
    version: Optional[str] = None,
    prefix: Union[None, str, Sequence[str]] = None,
    force: bool = False,
) -> Path:
    """Download the given version of DrugBank.

    :param username:
        The DrugBank username. If not passed, looks up in the environment
        ``DRUGBANK_USERNAME``. If not found, raises a ValueError.
    :param password:
        The DrugBank password. If not passed, looks up in the environment
        ``DRUGBANK_PASSWORD``. If not found, raises a ValueError.
    :param version:
        The DrugBank version. If not passed, uses :mod:`bioversions` to
        look up the most recent version.
    :param prefix:
        The prefix and subkeys passed to :func:`pystow.ensure` to specify
        a non-default location to download the data to.
    :param force:
        Should the data be re-downloaded, even if it exists?
    :returns: The path to the local DrugBank file after it's been downloaded

    :raises ImportError: If no version is specified and :mod:`bioversions`
        is not installed
    """
    if version is None:
        try:
            import bioversions
        except ImportError:
            raise ImportError(
                "must first `pip install bioversions` to get latest DrugBank version automatically"
            )
        else:
            version = bioversions.get_version("drugbank")

    url = (
        f'https://go.drugbank.com/releases/{version.replace(".", "-")}/downloads/all-full-database'
    )

    if prefix is None:
        prefix = ["drugbank"]
    elif isinstance(prefix, str):
        prefix = [prefix]

    username = get_config("drugbank",
                          "username",
                          passthrough=username,
                          raise_on_missing=True)
    password = get_config("drugbank",
                          "password",
                          passthrough=password,
                          raise_on_missing=True)

    return ensure(
        *prefix,
        version,
        url=url,
        name="full database.xml.zip",
        download_kwargs=dict(
            backend="requests",
            stream=True,
            auth=(username, password),
        ),
        force=force,
    )
コード例 #5
0
def ensure(version: Optional[str] = None, **kwargs) -> Path:
    """Ensure the given version is downloaded."""
    if version is None:
        version = get_version()
    url = get_url(version=version)
    return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
コード例 #6
0
def get_hetionet() -> BELGraph:
    """Get Hetionet from GitHub, cache, and convert to BEL."""
    path = pystow.ensure('bio2bel', 'hetionet', url=JSON_BZ2_URL)
    return from_hetionet_gz(path.as_posix())