def _download_file(download_url: str, downloader_job: DownloaderJob, target_file_path: str, force_ftp: bool = False) -> bool: """ Download file dispatcher. Dispatches to the FTP or Aspera downloader """ # SRA files have Apsera downloads. if 'ftp.sra.ebi.ac.uk' in download_url and not force_ftp: # From: ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz # To: [email protected]:/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz download_url = download_url.replace('ftp://', 'era-fasp@') download_url = download_url.replace('ftp', 'fasp') download_url = download_url.replace('.uk/', '.uk:/') return _download_file_aspera(download_url, downloader_job, target_file_path, source="ENA") elif "ncbi.nlm.nih.gov" in download_url and not force_ftp: # Try to convert old-style endpoints into new-style endpoints if possible try: if 'anonftp' in download_url: accession = download_url.split('/')[-1].split('.sra')[0] new_url = get_fasp_sra_download(accession) if new_url: download_url = new_url except Exception: pass return _download_file_aspera(download_url, downloader_job, target_file_path, source="NCBI") else: return _download_file_ftp(download_url, downloader_job, target_file_path)
def _build_ncbi_file_url(run_accession: str): """ Build the path to the hypothetical .sra file we want """ accession = run_accession first_three = accession[:3] first_six = accession[:6] # Prefer the FASP-specific endpoints if possible.. download_url = get_fasp_sra_download(run_accession) if not download_url: # ..else, load balancing via coin flip. if random.choice([True, False]): download_url = NCBI_DOWNLOAD_URL_TEMPLATE.format( first_three=first_three, first_six=first_six, accession=accession) else: download_url = NCBI_PRIVATE_DOWNLOAD_URL_TEMPLATE.format( first_three=first_three, first_six=first_six, accession=accession) return download_url