예제 #1
0
    def get_genome_download_link(self, name, mask="soft", **kwargs):
        """
        Return NCBI ftp link to top-level genome sequence

        Parameters
        ----------
        name : str
            Genome name. Current implementation will fail if exact
            name is not found.

        Returns
        ------
        tuple (name, link) where name is the NCBI asm_name identifier
        and link is a str with the ftp download link.
        """
        if mask != "soft":
            sys.stderr.write("ignoring mask parameter for NCBI at download.\n")

        if not self.genomes:
            self.genomes = self._get_genomes()

        for genome in self.genomes:
            if name in [genome["asm_name"], genome["asm_name"].replace(" ", "_")]:
                url = genome["ftp_path"]
                url = url.replace("ftp://", "https://")
                url += "/" + url.split("/")[-1] + "_genomic.fna.gz"
                return name, url
        raise exceptions.GenomeDownloadError("Could not download genome from NCBI")
예제 #2
0
    def get_genome_download_link(self, name, mask="soft", version=None):
        """
        Return NCBI ftp link to top-level genome sequence

        Parameters
        ----------
        name : str 
            Genome name. Current implementation will fail if exact
            name is not found.
        
        Returns
        ------
        tuple (name, link) where name is the NCBI asm_name identifier
        and link is a str with the ftp download link.
        """
        if mask == "hard":
            sys.stderr.write("ignoring mask parameter for NCBI at download.\n")

        if not self.genomes:
            self.genomes = self._get_genomes()

        for genome in self.genomes:
            if genome["asm_name"] == name:
                #ftp_path': 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/004/195/GCF_000004195.3_Xenopus_tropicalis_v9.1'
                url = genome["ftp_path"]
                url += "/" + url.split("/")[-1] + "_genomic.fna.gz"
                return name, url
        raise exceptions.GenomeDownloadError(
            "Could not download genome from NCBI")
예제 #3
0
    def get_genome_download_link(self, name, mask="soft", **kwargs):
        """
        Return UCSC http link to genome sequence

        Parameters
        ----------
        name : str
            Genome build name. Current implementation will fail if exact
            name is not found.

        Returns
        ------
        tuple (name, link) where name is the genome build identifier
        and link is a str with the http download link.
        """
        if mask not in ["soft", "hard"]:
            sys.stderr.write("ignoring mask parameter for UCSC at download.\n")

        urls = [self.ucsc_url, self.alt_ucsc_url]
        if mask == "hard":
            urls = [self.ucsc_url_masked, self.alt_ucsc_url_masked]

        for genome_url in urls:
            remote = genome_url.format(name)
            ret = requests.head(remote)

            if ret.status_code == 200:
                return name, remote

        raise exceptions.GenomeDownloadError(
            "Could not download genome {} from UCSC".format(name)
        )
예제 #4
0
 def _get_genome_info(self, name):
     """Get genome_info from json request."""
     try:
         assembly_id = ""
         for genome in self.list_available_genomes(as_dict=True):
             if genome.get("assembly_name", "") == name:
                 assembly_id = genome.get("assembly_id", "")
                 break
         if assembly_id:
             ext = "info/genomes/assembly/" + assembly_id + "/?"
             genome_info = self.request_json(ext)
         else:
             raise exceptions.GenomeDownloadError(
                 "Could not download genome {} from Ensembl".format(name))
     except requests.exceptions.HTTPError as e:
         sys.stderr.write("Species not found: {}".format(e))
         raise exceptions.GenomeDownloadError(
             "Could not download genome {} from Ensembl".format(name))
     return genome_info