def get_genome_download_link(self, name, mask="soft", **kwargs): """ Return NCBI ftp link to top-level genome sequence Parameters ---------- name : str Genome name. Current implementation will fail if exact name is not found. Returns ------ tuple (name, link) where name is the NCBI asm_name identifier and link is a str with the ftp download link. """ if mask != "soft": sys.stderr.write("ignoring mask parameter for NCBI at download.\n") if not self.genomes: self.genomes = self._get_genomes() for genome in self.genomes: if name in [genome["asm_name"], genome["asm_name"].replace(" ", "_")]: url = genome["ftp_path"] url = url.replace("ftp://", "https://") url += "/" + url.split("/")[-1] + "_genomic.fna.gz" return name, url raise exceptions.GenomeDownloadError("Could not download genome from NCBI")
def get_genome_download_link(self, name, mask="soft", version=None): """ Return NCBI ftp link to top-level genome sequence Parameters ---------- name : str Genome name. Current implementation will fail if exact name is not found. Returns ------ tuple (name, link) where name is the NCBI asm_name identifier and link is a str with the ftp download link. """ if mask == "hard": sys.stderr.write("ignoring mask parameter for NCBI at download.\n") if not self.genomes: self.genomes = self._get_genomes() for genome in self.genomes: if genome["asm_name"] == name: #ftp_path': 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/004/195/GCF_000004195.3_Xenopus_tropicalis_v9.1' url = genome["ftp_path"] url += "/" + url.split("/")[-1] + "_genomic.fna.gz" return name, url raise exceptions.GenomeDownloadError( "Could not download genome from NCBI")
def get_genome_download_link(self, name, mask="soft", **kwargs): """ Return UCSC http link to genome sequence Parameters ---------- name : str Genome build name. Current implementation will fail if exact name is not found. Returns ------ tuple (name, link) where name is the genome build identifier and link is a str with the http download link. """ if mask not in ["soft", "hard"]: sys.stderr.write("ignoring mask parameter for UCSC at download.\n") urls = [self.ucsc_url, self.alt_ucsc_url] if mask == "hard": urls = [self.ucsc_url_masked, self.alt_ucsc_url_masked] for genome_url in urls: remote = genome_url.format(name) ret = requests.head(remote) if ret.status_code == 200: return name, remote raise exceptions.GenomeDownloadError( "Could not download genome {} from UCSC".format(name) )
def _get_genome_info(self, name): """Get genome_info from json request.""" try: assembly_id = "" for genome in self.list_available_genomes(as_dict=True): if genome.get("assembly_name", "") == name: assembly_id = genome.get("assembly_id", "") break if assembly_id: ext = "info/genomes/assembly/" + assembly_id + "/?" genome_info = self.request_json(ext) else: raise exceptions.GenomeDownloadError( "Could not download genome {} from Ensembl".format(name)) except requests.exceptions.HTTPError as e: sys.stderr.write("Species not found: {}".format(e)) raise exceptions.GenomeDownloadError( "Could not download genome {} from Ensembl".format(name)) return genome_info