def download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # parse sequence report and download sequences wgs_scaffolds = [] wgs_scaffold_cnt = 0 if has_sequence_report: wgs_scaffolds = download_sequences(sequence_report.split('/')[-1], assembly_dir, output_format, expanded, quiet) wgs_scaffold_cnt = len(wgs_scaffolds) if wgs_scaffold_cnt > 0: if not quiet: print 'Assembly contains {} WGS scaffolds, will fetch WGS set'.format(wgs_scaffold_cnt) fetch_wgs = True else: fetch_wgs = True # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # extract wgs scaffolds from WGS file if wgs_scaffold_cnt > 0 and extract_wgs: extract_wgs_scaffolds(assembly_dir, wgs_scaffolds, wgs_set, output_format, quiet)
def download_assembly(dest_dir, accession, output_format, fetch_wgs, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # parse sequence report and download sequences if has_sequence_report: download_sequences( sequence_report.split('/')[-1], assembly_dir, output_format, quiet)
def download_sequence(dest_dir, accession, output_format, expanded): if output_format is None: output_format = utils.EMBL_FORMAT success = utils.download_record(dest_dir, accession, output_format, expanded) if not success: print 'Unable to fetch file for {0}, format {1}'.format(accession, output_format) return success
def download_sequence(dest_dir, accession, format): if format is None: format = utils.EMBL_FORMAT success = utils.download_record(dest_dir, accession, format) if not success: print 'Unable to fetch file for ' + accession + ', format ' + format return success
def download_meta(accession, dest_dir): utils.download_record(dest_dir, accession, utils.XML_FORMAT)
def download_sequence(dest_dir, accession, output_format, expanded): success = utils.download_record(dest_dir, accession, output_format, expanded) if not success: print('Unable to fetch file for {0}, format {1}'.format( accession, output_format))