def download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # parse sequence report and download sequences wgs_scaffolds = [] wgs_scaffold_cnt = 0 if has_sequence_report: wgs_scaffolds = download_sequences(sequence_report.split('/')[-1], assembly_dir, output_format, expanded, quiet) wgs_scaffold_cnt = len(wgs_scaffolds) if wgs_scaffold_cnt > 0: if not quiet: print 'Assembly contains {} WGS scaffolds, will fetch WGS set'.format(wgs_scaffold_cnt) fetch_wgs = True else: fetch_wgs = True # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # extract wgs scaffolds from WGS file if wgs_scaffold_cnt > 0 and extract_wgs: extract_wgs_scaffolds(assembly_dir, wgs_scaffolds, wgs_set, output_format, quiet)
def download_assembly(dest_dir, accession, output_format, fetch_wgs, quiet=False): if output_format is None: output_format = utils.EMBL_FORMAT assembly_dir = os.path.join(dest_dir, accession) utils.create_dir(assembly_dir) # download xml utils.download_record(assembly_dir, accession, utils.XML_FORMAT) local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT) # get wgs and sequence report info wgs_set, sequence_report = parse_assembly_xml(local_xml) has_sequence_report = False # download sequence report if sequence_report is not None: has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir) # download wgs set if needed if wgs_set is not None and fetch_wgs: if not quiet: print 'fetching wgs set' sequenceGet.download_wgs(assembly_dir, wgs_set, output_format) # parse sequence report and download sequences if has_sequence_report: download_sequences( sequence_report.split('/')[-1], assembly_dir, output_format, quiet)
def download_data(group, data_accession, output_format, group_dir, fetch_wgs, extract_wgs, expanded, fetch_meta, fetch_index, aspera): if group == utils.WGS: print 'Fetching ' + data_accession[:6] sequenceGet.download_wgs(group_dir, data_accession[:6], output_format) else: print 'Fetching ' + data_accession if group == utils.ASSEMBLY: assemblyGet.download_assembly(group_dir, data_accession, output_format, fetch_wgs, extract_wgs, expanded, True) elif group in [utils.READ, utils.ANALYSIS]: readGet.download_files(data_accession, output_format, group_dir, fetch_index, fetch_meta, aspera)
def download_data(group, data_accession, format, group_dir, fetch_wgs, fetch_meta, fetch_index, aspera): if group == utils.WGS: print ('Fetching ' + data_accession[:6]) if aspera: print ('Aspera not supported for WGS data. Using FTP...') sequenceGet.download_wgs(group_dir, data_accession[:6], format) else: print ('Fetching ' + data_accession) if group == utils.ASSEMBLY: if aspera: print ('Aspera not supported for assembly data. Using FTP...') assemblyGet.download_assembly(group_dir, data_accession, format, fetch_wgs, True) elif group in [utils.READ, utils.ANALYSIS]: readGet.download_files(data_accession, format, group_dir, fetch_index, fetch_meta, aspera)
fetch_wgs = args.wgs extract_wgs = args.extract_wgs expanded = args.expanded fetch_meta = args.meta fetch_index = args.index aspera = args.aspera aspera_settings = args.aspera_settings if aspera or aspera_settings is not None: aspera = utils.set_aspera(aspera_settings) try: if utils.is_wgs_set(accession): if output_format is not None: sequenceGet.check_format(output_format) sequenceGet.download_wgs(dest_dir, accession, output_format) elif not utils.is_available(accession): sys.stderr.write('ERROR: Record does not exist or is not available for accession provided\n') sys.exit(1) elif utils.is_sequence(accession): if output_format is not None: sequenceGet.check_format(output_format) sequenceGet.download_sequence(dest_dir, accession, output_format, expanded) elif utils.is_analysis(accession): if output_format is not None: readGet.check_read_format(output_format) readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera) elif utils.is_run(accession) or utils.is_experiment(accession): if output_format is not None: readGet.check_read_format(output_format) readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera)