コード例 #1
0
ファイル: readGet.py プロジェクト: dorbarker/enaBrowserTools
def download_file(file_url, dest_dir, aspera):
    print ('Downloading file', file_url)
    if aspera:
        success = utils.get_aspera_file(file_url, dest_dir)
    else:
        success = utils.get_ftp_file('ftp://' + file_url, dest_dir)
        if not success:
            success = utils.get_ftp_file('ftp://' + file_url, dest_dir)
    if not success:
        print ('Failed to download file')
コード例 #2
0
def download_versioned_wgs(dest_dir, accession, format):
    prefix = accession[:6]
    if format is None:
        format = utils.EMBL_FORMAT
    public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, format)
    supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED, format)
    success = utils.get_ftp_file(public_set_url, dest_dir)
    if not success:
        success = utils.get_ftp_file(supp_set_url, dest_dir)
    if not success:
        print ('No WGS set file available for ' + accession + ', format ' + format)
        print ('Please contact ENA ([email protected]) if you feel this set should be available')
コード例 #3
0
def download_unversioned_wgs(dest_dir, accession, format):
    prefix = accession[:4]
    if format is None:
        format = utils.EMBL_FORMAT
    public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, format)
    if public_set_url is not None:
        utils.get_ftp_file(public_set_url, dest_dir)
    else:
        supp_set_url = utils.get_nonversion_supp_wgs_ftp_url(prefix, format)
        if supp_set_url is not None:
            utils.get_ftp_file(supp_set_url, dest_dir)
        else:
            print ('No WGS set file available for ' + accession + ', format ' + format)
            print ('Please contact ENA ([email protected]) if you feel this set should be available')
コード例 #4
0
def download_versioned_wgs(dest_dir, accession, output_format):
    prefix = accession[:6]
    public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, output_format)
    supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED,
                                         output_format)
    success = utils.get_ftp_file(public_set_url, dest_dir)
    if not success:
        success = utils.get_ftp_file(supp_set_url, dest_dir)
    if not success:
        print('No WGS set file available for {0}, format {1}'.format(
            accession, output_format))
        print(
            'Please contact ENA (https://www.ebi.ac.uk/ena/browser/support) if you feel this set should be available'
        )
コード例 #5
0
def download_unversioned_wgs(dest_dir, accession, output_format):
    prefix = accession[:4]
    if output_format is None:
        output_format = utils.EMBL_FORMAT
    public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, output_format)
    if public_set_url is not None:
        utils.get_ftp_file(public_set_url, dest_dir)
    else:
        supp_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.SUPPRESSED, output_format)
        if supp_set_url is not None:
            utils.get_ftp_file(supp_set_url, dest_dir)
        else:
            print 'No WGS set file available for {0}, format {1}'.format(accession, output_format)
            print 'Please contact ENA ([email protected]) if you feel this set should be available'
コード例 #6
0
def download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded, quiet=False):
    if output_format is None:
        output_format = utils.EMBL_FORMAT
    assembly_dir = os.path.join(dest_dir, accession)
    utils.create_dir(assembly_dir)
    # download xml
    utils.download_record(assembly_dir, accession, utils.XML_FORMAT)
    local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT)
    # get wgs and sequence report info
    wgs_set, sequence_report = parse_assembly_xml(local_xml)
    has_sequence_report = False
    # download sequence report
    if sequence_report is not None:
        has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir)
    # parse sequence report and download sequences
    wgs_scaffolds = []
    wgs_scaffold_cnt = 0
    if has_sequence_report:
        wgs_scaffolds = download_sequences(sequence_report.split('/')[-1], assembly_dir, output_format, expanded, quiet)
        wgs_scaffold_cnt = len(wgs_scaffolds)
        if wgs_scaffold_cnt > 0:
            if not quiet:
                print 'Assembly contains {} WGS scaffolds, will fetch WGS set'.format(wgs_scaffold_cnt)
            fetch_wgs = True
    else:
        fetch_wgs = True
    # download wgs set if needed
    if wgs_set is not None and fetch_wgs:
        if not quiet:
            print 'fetching wgs set'
        sequenceGet.download_wgs(assembly_dir, wgs_set, output_format)
        # extract wgs scaffolds from WGS file
        if wgs_scaffold_cnt > 0 and extract_wgs:
            extract_wgs_scaffolds(assembly_dir, wgs_scaffolds, wgs_set, output_format, quiet)
コード例 #7
0
def download_assembly(dest_dir,
                      accession,
                      output_format,
                      fetch_wgs,
                      quiet=False):
    if output_format is None:
        output_format = utils.EMBL_FORMAT
    assembly_dir = os.path.join(dest_dir, accession)
    utils.create_dir(assembly_dir)
    # download xml
    utils.download_record(assembly_dir, accession, utils.XML_FORMAT)
    local_xml = utils.get_destination_file(assembly_dir, accession,
                                           utils.XML_FORMAT)
    # get wgs and sequence report info
    wgs_set, sequence_report = parse_assembly_xml(local_xml)
    has_sequence_report = False
    # download sequence report
    if sequence_report is not None:
        has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir)
    # download wgs set if needed
    if wgs_set is not None and fetch_wgs:
        if not quiet:
            print 'fetching wgs set'
        sequenceGet.download_wgs(assembly_dir, wgs_set, output_format)
    # parse sequence report and download sequences
    if has_sequence_report:
        download_sequences(
            sequence_report.split('/')[-1], assembly_dir, output_format, quiet)
コード例 #8
0
def attempt_file_download(file_url, dest_dir, md5, aspera):
    if md5 is not None:
        print('Downloading file with md5 check:' + file_url)
        if aspera:
            return utils.get_aspera_file_with_md5_check(file_url, dest_dir, md5)
        else:
            return utils.get_ftp_file_with_md5_check('ftp://' + file_url, dest_dir, md5)
    print('Downloading file:' + file_url)
    if aspera:
        return utils.get_aspera_file(file_url, dest_dir)
    return utils.get_ftp_file('ftp://' + file_url, dest_dir)