Ejemplo n.º 1
0
def download_files(accession, output_format, dest_dir, fetch_index, fetch_meta,
                   aspera):
    accession_dir = os.path.join(dest_dir, accession)
    utils.create_dir(accession_dir)
    # download experiment xml
    is_experiment = utils.is_experiment(accession)
    if fetch_meta and is_experiment:
        download_meta(accession, accession_dir)
    if fetch_meta and utils.is_run(accession):
        download_experiment_meta(accession, accession_dir)
    # download data files
    search_url = utils.get_file_search_query(accession, aspera)
    temp_file = os.path.join(dest_dir, 'temp.txt')
    utils.download_report_from_portal(search_url, temp_file)
    f = open(temp_file)
    lines = f.readlines()
    f.close()
    os.remove(temp_file)
    for line in lines[1:]:
        data_accession, filelist, md5list, indexlist = utils.parse_file_search_result_line(
            line, accession, output_format)
        # create run directory if downloading all data for an experiment
        if is_experiment:
            run_dir = os.path.join(accession_dir, data_accession)
            utils.create_dir(run_dir)
            target_dir = run_dir
        else:
            target_dir = accession_dir
        # download run/analysis XML
        if fetch_meta:
            download_meta(data_accession, target_dir)
        if len(filelist) == 0:
            if output_format is None:
                print 'No files available for {0}'.format(data_accession)
            else:
                print 'No files of format {0} available for {1}'.format(
                    output_format, data_accession)
            continue
        for i in range(len(filelist)):
            file_url = filelist[i]
            md5 = md5list[i]
            if file_url != '':
                download_file(file_url, target_dir, md5, aspera)
        if fetch_index:
            for index_file in indexlist:
                if index_file != '':
                    download_file(index_file, target_dir, None, aspera)
        if utils.is_empty_dir(target_dir):
            print 'Deleting directory ' + os.path.basename(target_dir)
            os.rmdir(target_dir)
Ejemplo n.º 2
0
def download_files(accession, format, dest_dir, fetch_index, fetch_meta, aspera):
    if format is None:
        format = utils.SUBMITTED_FORMAT
    accession_dir = os.path.join(dest_dir, accession)
    utils.create_dir(accession_dir)
    # download experiment xml
    is_experiment = utils.is_experiment(accession)
    if fetch_meta and is_experiment:
        download_meta(accession, accession_dir)
    # TODO download experiment xml for run accession
    if fetch_meta and utils.is_run(accession):
        download_experiment_meta(accession, accession_dir)
    # download data files
    search_url = utils.get_file_search_query(accession, format, fetch_index, aspera)
    temp_file = os.path.join(dest_dir, 'temp.txt')
    utils.download_report_from_portal(search_url, temp_file)
    f = open(temp_file)
    lines = f.readlines()
    f.close()
    os.remove(temp_file)
    for line in lines[1:]:
        data_accession, filelist, md5list, indexlist = utils.parse_file_search_result_line(line, accession,
                                                                                           format, fetch_index)
        # create run directory if downloading all data for an experiment
        if is_experiment:
            run_dir = os.path.join(accession_dir, data_accession)
            utils.create_dir(run_dir)
            target_dir = run_dir
        else:
            target_dir = accession_dir
        # download run/analysis XML
        if fetch_meta:
            download_meta(data_accession, target_dir)
        if len(filelist) == 0:
            print 'No files of format ' + format + ' for ' + data_accession
            continue
        for i in range(len(filelist)):
            file_url = filelist[i]
            md5 = md5list[i]
            if file_url != '':
                download_file_with_md5_check(file_url, target_dir, md5, aspera)
        for index_file in indexlist:
            if index_file != '':
                download_file(index_file, target_dir, aspera)
Ejemplo n.º 3
0
        for index_file in indexlist:
            if index_file != '':
                download_file(index_file, target_dir, aspera)


if __name__ == '__main__':
    parser = set_parser()
    args = parser.parse_args()

    accession = args.accession
    format = args.format
    dest_dir = args.dest
    fetch_meta = args.meta
    fetch_index = args.index
    aspera = args.aspera

    if not utils.is_run(accession) and not utils.is_experiment(accession):
        print 'Error: Invalid accession. An INSDC run or experiment accession must be provided'
        sys.exit(1)

    if not utils.is_available(accession):
        print 'Record does not exist or is not available for accession provided'
        sys.exit(1)

    try:
        download_files(accession, format, dest_dir, fetch_index, fetch_meta, aspera)
        print 'Completed'
    except Exception:
        utils.print_error()
        sys.exit(1)
Ejemplo n.º 4
0
     if utils.is_wgs_set(accession):
         if output_format is not None:
             sequenceGet.check_format(output_format)
         sequenceGet.download_wgs(dest_dir, accession, output_format)
     elif not utils.is_available(accession):
         sys.stderr.write('ERROR: Record does not exist or is not available for accession provided\n')
         sys.exit(1)
     elif utils.is_sequence(accession):
         if output_format is not None:
             sequenceGet.check_format(output_format)
         sequenceGet.download_sequence(dest_dir, accession, output_format, expanded)
     elif utils.is_analysis(accession):
         if output_format is not None:
             readGet.check_read_format(output_format)
         readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera)
     elif utils.is_run(accession) or utils.is_experiment(accession):
         if output_format is not None:
             readGet.check_read_format(output_format)
         readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera)
     elif utils.is_assembly(accession):
         if output_format is not None:
             assemblyGet.check_format(output_format)
         assemblyGet.download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded)
     else:
         sys.stderr.write('ERROR: Invalid accession provided\n')
         sys.exit(1)
     print 'Completed'
 except Exception:
     traceback.print_exc()
     utils.print_error()
     sys.exit(1)