예제 #1
0
def _download_file(download_url: str,
                   downloader_job: DownloaderJob,
                   target_file_path: str,
                   force_ftp: bool = False) -> bool:
    """ Download file dispatcher. Dispatches to the FTP or Aspera downloader """

    # SRA files have Apsera downloads.
    if 'ftp.sra.ebi.ac.uk' in download_url and not force_ftp:
        # From: ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz
        # To: [email protected]:/vol1/fastq/ERR036/ERR036000/ERR036000_1.fastq.gz
        download_url = download_url.replace('ftp://', 'era-fasp@')
        download_url = download_url.replace('ftp', 'fasp')
        download_url = download_url.replace('.uk/', '.uk:/')
        return _download_file_aspera(download_url,
                                     downloader_job,
                                     target_file_path,
                                     source="ENA")
    elif "ncbi.nlm.nih.gov" in download_url and not force_ftp:
        # Try to convert old-style endpoints into new-style endpoints if possible
        try:
            if 'anonftp' in download_url:
                accession = download_url.split('/')[-1].split('.sra')[0]
                new_url = get_fasp_sra_download(accession)
                if new_url:
                    download_url = new_url
        except Exception:
            pass
        return _download_file_aspera(download_url,
                                     downloader_job,
                                     target_file_path,
                                     source="NCBI")
    else:
        return _download_file_ftp(download_url, downloader_job,
                                  target_file_path)
예제 #2
0
파일: sra.py 프로젝트: arjunkrish/refinebio
    def _build_ncbi_file_url(run_accession: str):
        """ Build the path to the hypothetical .sra file we want """
        accession = run_accession
        first_three = accession[:3]
        first_six = accession[:6]

        # Prefer the FASP-specific endpoints if possible..
        download_url = get_fasp_sra_download(run_accession)

        if not download_url:
            # ..else, load balancing via coin flip.
            if random.choice([True, False]):
                download_url = NCBI_DOWNLOAD_URL_TEMPLATE.format(
                    first_three=first_three,
                    first_six=first_six,
                    accession=accession)
            else:
                download_url = NCBI_PRIVATE_DOWNLOAD_URL_TEMPLATE.format(
                    first_three=first_three,
                    first_six=first_six,
                    accession=accession)

        return download_url