def download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera): accession_dir = os.path.join(dest_dir, accession) utils.create_dir(accession_dir) # download experiment xml is_experiment = utils.is_experiment(accession) if fetch_meta and is_experiment: download_meta(accession, accession_dir) if fetch_meta and utils.is_run(accession): download_experiment_meta(accession, accession_dir) # download data files search_url = utils.get_file_search_query(accession, aspera) temp_file = os.path.join(dest_dir, 'temp.txt') utils.download_report_from_portal(search_url, temp_file) f = open(temp_file) lines = f.readlines() f.close() os.remove(temp_file) for line in lines[1:]: data_accession, filelist, md5list, indexlist = utils.parse_file_search_result_line( line, accession, output_format) # create run directory if downloading all data for an experiment if is_experiment: run_dir = os.path.join(accession_dir, data_accession) utils.create_dir(run_dir) target_dir = run_dir else: target_dir = accession_dir # download run/analysis XML if fetch_meta: download_meta(data_accession, target_dir) if len(filelist) == 0: if output_format is None: print 'No files available for {0}'.format(data_accession) else: print 'No files of format {0} available for {1}'.format( output_format, data_accession) continue for i in range(len(filelist)): file_url = filelist[i] md5 = md5list[i] if file_url != '': download_file(file_url, target_dir, md5, aspera) if fetch_index: for index_file in indexlist: if index_file != '': download_file(index_file, target_dir, None, aspera) if utils.is_empty_dir(target_dir): print 'Deleting directory ' + os.path.basename(target_dir) os.rmdir(target_dir)
def download_files(accession, format, dest_dir, fetch_index, fetch_meta, aspera): if format is None: format = utils.SUBMITTED_FORMAT accession_dir = os.path.join(dest_dir, accession) utils.create_dir(accession_dir) # download experiment xml is_experiment = utils.is_experiment(accession) if fetch_meta and is_experiment: download_meta(accession, accession_dir) # TODO download experiment xml for run accession if fetch_meta and utils.is_run(accession): download_experiment_meta(accession, accession_dir) # download data files search_url = utils.get_file_search_query(accession, format, fetch_index, aspera) temp_file = os.path.join(dest_dir, 'temp.txt') utils.download_report_from_portal(search_url, temp_file) f = open(temp_file) lines = f.readlines() f.close() os.remove(temp_file) for line in lines[1:]: data_accession, filelist, md5list, indexlist = utils.parse_file_search_result_line(line, accession, format, fetch_index) # create run directory if downloading all data for an experiment if is_experiment: run_dir = os.path.join(accession_dir, data_accession) utils.create_dir(run_dir) target_dir = run_dir else: target_dir = accession_dir # download run/analysis XML if fetch_meta: download_meta(data_accession, target_dir) if len(filelist) == 0: print 'No files of format ' + format + ' for ' + data_accession continue for i in range(len(filelist)): file_url = filelist[i] md5 = md5list[i] if file_url != '': download_file_with_md5_check(file_url, target_dir, md5, aspera) for index_file in indexlist: if index_file != '': download_file(index_file, target_dir, aspera)
for index_file in indexlist: if index_file != '': download_file(index_file, target_dir, aspera) if __name__ == '__main__': parser = set_parser() args = parser.parse_args() accession = args.accession format = args.format dest_dir = args.dest fetch_meta = args.meta fetch_index = args.index aspera = args.aspera if not utils.is_run(accession) and not utils.is_experiment(accession): print 'Error: Invalid accession. An INSDC run or experiment accession must be provided' sys.exit(1) if not utils.is_available(accession): print 'Record does not exist or is not available for accession provided' sys.exit(1) try: download_files(accession, format, dest_dir, fetch_index, fetch_meta, aspera) print 'Completed' except Exception: utils.print_error() sys.exit(1)
if utils.is_wgs_set(accession): if output_format is not None: sequenceGet.check_format(output_format) sequenceGet.download_wgs(dest_dir, accession, output_format) elif not utils.is_available(accession): sys.stderr.write('ERROR: Record does not exist or is not available for accession provided\n') sys.exit(1) elif utils.is_sequence(accession): if output_format is not None: sequenceGet.check_format(output_format) sequenceGet.download_sequence(dest_dir, accession, output_format, expanded) elif utils.is_analysis(accession): if output_format is not None: readGet.check_read_format(output_format) readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera) elif utils.is_run(accession) or utils.is_experiment(accession): if output_format is not None: readGet.check_read_format(output_format) readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera) elif utils.is_assembly(accession): if output_format is not None: assemblyGet.check_format(output_format) assemblyGet.download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded) else: sys.stderr.write('ERROR: Invalid accession provided\n') sys.exit(1) print 'Completed' except Exception: traceback.print_exc() utils.print_error() sys.exit(1)