예제 #1
0
파일: run_kraken.py 프로젝트: ImerM/sag_aws
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--input_files',
                                 type=str,
                                 help='S3 paths for input files',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--results_path', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')
    fastq_files = generate_input_string(args.input_files, working_dir)

    print('Running Kraken')
    kraken_folder = run_kraken(args.cmd_args, fastq_files, working_dir)

    print('Uploading results to %s' % args.results_path)
    upload_folder(args.results_path, kraken_folder)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
예제 #2
0
def main():
    argparser = ArgumentParser()

    argparser.add_argument('--vcf_s3_path',
                           type=str,
                           help='VCF s3 path',
                           required=True)
    argparser.add_argument('--annotated_vcf_s3_path',
                           type=str,
                           help='Annotated vcf s3 path',
                           required=True)
    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--cmd_args',
                           type=str,
                           help='arguments/options for snpeff',
                           default='-t')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    print('Downloading vcf')
    local_vcf_path = download_file(args.vcf_s3_path, working_dir)
    print('Running snpeff')
    annotated_vcf_path = run_snpeff(local_vcf_path, args.cmd_args, working_dir)
    print('Uploading %s to %s' %
          (annotated_vcf_path, args.annotated_vcf_s3_path))
    upload_file(args.annotated_vcf_s3_path, annotated_vcf_path)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
예제 #3
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--bam_s3_path',
                                 type=str,
                                 help='BAM s3 path',
                                 required=True)
    file_path_group.add_argument('--bai_s3_path',
                                 type=str,
                                 help='BAM Index s3 path',
                                 required=True)
    file_path_group.add_argument('--vcf_s3_path',
                                 type=str,
                                 help='VCF s3 path',
                                 required=True)
    file_path_group.add_argument('--reference_s3_path',
                                 type=str,
                                 help='Reference file s3 path',
                                 required=True)
    file_path_group.add_argument('--reference_index_s3_path',
                                 type=str,
                                 help='Reference file index s3 path',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--memory',
                           type=str,
                           help='Memory (in GB) for strelka to use',
                           default=28)
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Additional arguments for platypus',
                           default='')

    argparser.add_argument('--working_dir', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    print("Downloading bam")
    local_bam_path = download_file(args.bam_s3_path, working_dir)
    local_bai_path = download_file(args.bai_s3_path, working_dir)
    print("BAM and index donwloaded to %s and %s" %
          (local_bam_path, local_bai_path))
    print("Downloading reference")
    local_reference_path = download_file(args.reference_s3_path, working_dir)
    local_reference_index_path = download_file(args.reference_index_s3_path,
                                               working_dir)
    print("Reference downloaded to %s. Index to %s" %
          (local_reference_path, local_reference_index_path))
    print("Running Strelka")
    local_vcf_path = run_strelka(local_bam_path, local_reference_path,
                                 args.memory, args.cmd_args, working_dir)
    print("Uploading %s to %s" % (local_vcf_path, args.vcf_s3_path))
    upload_folder(args.vcf_s3_path, local_vcf_path)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print("Completed")
예제 #4
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--input_file',
                                 type=str,
                                 help='s3 path',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/docker_share')
    argparser.add_argument('--results_path', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')

    input_file = download_fastq_file(args.input_file, working_dir)
    print('Running busco')
    busco_folder = run_busco(args.cmd_args, input_file, working_dir)

    print('Uploading results to %s' % args.results_path)
    upload_results(args.results_path, busco_folder)

    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
예제 #5
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--bam_s3_folder_path',
                                 type=str,
                                 help='BAM s3 path',
                                 required=True)
    file_path_group.add_argument('--fastq1_s3_path',
                                 type=str,
                                 help='FASTQ1 s3 path',
                                 required=True)
    file_path_group.add_argument('--fastq2_s3_path',
                                 type=str,
                                 help='FASTQ2 s3  path',
                                 required=True)
    file_path_group.add_argument('--reference_s3_path',
                                 type=str,
                                 help='reference file',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--memory',
                           type=str,
                           help='Memory for Isaac in GB',
                           default='76')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for Isaac',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')
    fastq_folder_path = download_fastq_files(args.fastq1_s3_path,
                                             args.fastq2_s3_path, working_dir)
    print('Downloading Reference')
    reference_folder_path = download_reference(args.reference_s3_path,
                                               working_dir)
    print('Running Isaac')
    bam_folder_path = run_isaac(reference_folder_path, fastq_folder_path,
                                args.memory, args.cmd_args, working_dir)
    print('Uploading results to %s' % args.bam_s3_folder_path)
    upload_bam(args.bam_s3_folder_path, bam_folder_path)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
예제 #6
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument(
        '--input_flags',
        type=str,
        help='All the flags for the files, in correct order',
        required=True)
    file_path_group.add_argument(
        '--input_files',
        type=str,
        help='All the S3 file locations, in correct order',
        required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--results_path', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    print('Downloading files and forming input parameters')
    print('input flags', args.input_flags)
    print('input_files', args.input_files)

    input_arguments = generate_input_string(args.input_files, args.input_flags,
                                            working_dir)

    #Download to container the fastq files
    try:
        print('Running SPAdes')
        results_folder_path = run_spades(input_arguments, args.cmd_args,
                                         working_dir)
    except Exception as e:
        upload_results(args.results_path, working_dir)

    upload_results(args.results_path, results_folder_path)
    print('Cleaning the working dir')
    delete_working_dir(working_dir)
    print('Completed')
    """
예제 #7
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--bam_s3_path',
                                 type=str,
                                 help='BAM s3 path',
                                 required=True)
    file_path_group.add_argument('--reference_s3_path',
                                 type=str,
                                 help='reference file',
                                 required=True)
    file_path_group.add_argument('--bam_stats_s3_path',
                                 type=str,
                                 help='S3 Path to upload stats',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for platypus',
                           default='')

    argparser.add_argument('--working_dir', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    print("Downloading bam")
    local_bam_path = download_file(args.bam_s3_path, working_dir)
    print("BAM downloaded to %s" % local_bam_path)
    print("Downloading reference")
    local_reference_path = download_file(args.reference_s3_path, working_dir)
    print("Reference downloaded to %s." % local_reference_path)
    print("Running samtools stats")
    local_stats_path = run_samtools_stats(local_bam_path, local_reference_path,
                                          args.cmd_args, working_dir)
    print("Uploading %s to %s" % (local_stats_path, args.bam_stats_s3_path))
    upload_file(args.bam_stats_s3_path, local_stats_path)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print("Completed")
예제 #8
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--fastq1_s3_path',
                                 type=str,
                                 help='FASTQ1 s3 path',
                                 required=True)
    file_path_group.add_argument('--fastq2_s3_path',
                                 type=str,
                                 help='FASTQ2 s3  path',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--results_path', type=str, default='/scratch')

    args = argparser.parse_args()

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')
    fastq_files = '{0} {1}'.format(
        download_fastq_file(args.fastq1_s3_path, working_dir),
        download_fastq_file(args.fastq2_s3_path, working_dir))

    print('Running trim_galore')
    trimmed_folder = run_trim_galore(args.cmd_args, fastq_files, working_dir)

    print('Uploading results to %s' % args.results_path)
    upload_bam(args.results_path, trimmed_folder)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
    """
예제 #9
0
def main():
    args = parseArguments()
    logging.basicConfig(level=args.log_level)
    logger.info("Run cohort-matcher Docker CLI v%s", __version__)
    logger.info(args)

    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    logger.info('Downloading bam sheets')
    set1_bamsheet = download_file(args.set1_s3_path, working_dir)
    set2_bamsheet = download_file(args.set2_s3_path, working_dir)

    # Download reference bundles
    if args.set1_reference == 'hg19' or args.set2_reference == 'hg19':
        logger.info("Downloading hg19 reference bundle")
        download_file('s3://bmsrd-ngs-repo/reference/hg19-cohort-matcher.tar.bz2', working_dir)
        logger.info("Uncompressing hg19 reference bundle")
        uncompress(os.path.join(working_dir, 'hg19-cohort-matcher.tar.bz2'), working_dir)
    if args.set2_reference == 'GRCh37' or args.set2_reference == 'GRCh37':
        logger.info("Downloading GRCh37 reference bundle")
        download_file('s3://bmsrd-ngs-repo/reference/GRCh37-cohort-matcher.tar.bz2', working_dir)
        logger.info("Uncompressing GRCh37 reference bundle")
        uncompress(os.path.join(working_dir, 'GRCh37-cohort-matcher.tar.bz2', working_dir))

    # Run cohort-matcher
    logger.info('Running cohort-matcher')
    if args.max_jobs is None:
        max_jobs = multiprocessing.cpu_count()
    else:
        max_jobs = args.max_jobs
    output_folder_path = run_cohort_matcher(args.log_level, set1_bamsheet, set2_bamsheet,
                                            args.set1_reference, args.set2_reference,
                                            working_dir, args.output_prefix, max_jobs)
    logger.info('Uploading results to %s', args.s3_output_folder_path)
    #upload_bam(args.bam_s3_folder_path, bam_folder_path)
    logger.info('Cleaning up working dir')
    delete_working_dir(working_dir)
    logger.info('Completed')
예제 #10
0
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--vcf_s3_path',
                                 type=str,
                                 help='VCF s3 path',
                                 required=True)
    file_path_group.add_argument('--bam_s3_path',
                                 type=str,
                                 help='BAM s3 path',
                                 required=True)
    file_path_group.add_argument('--bai_s3_path',
                                 type=str,
                                 help='BAI s3 path',
                                 required=True)
    file_path_group.add_argument('--results_s3_path',
                                 type=str,
                                 help='S3 Path to upload stats',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--cmd_args',
                           type=str,
                           help='Additional Arguments',
                           default=None,
                           nargs='*',
                           action='store',
                           dest='opt_list')

    #argparser.add_argument('--working_dir', type=str, default='/scratch')

    args = argparser.parse_args()

    total_size = 0
    for obj in [args.vcf_s3_path, args.bam_s3_path, args.bai_s3_path]:
        total_size += get_size(obj)

    # add more for formatting loss
    total_size += 2e9
    total_size = int(total_size)

    print("Total Size := {0}".format(total_size))

    # Declare expected disk usage, triggers host's EBS script (ecs-ebs-manager)
    with open("/TOTAL_SIZE", "w") as text_file:
        text_file.write("{0}".format(total_size))

    print("Waiting EBS")

    # Wait for EBS to appear
    while not os.path.isdir('/scratch'):
        time.sleep(5)

    # Wait for mount verification
    while not os.path.ismount('/scratch'):
        time.sleep(1)

    working_dir = generate_working_dir('/scratch')

    print("Downloading vcf")
    local_vcf_path = download_file(args.vcf_s3_path, working_dir)
    print("VCF downloaded to %s" % local_vcf_path)

    print("Downloading bam")
    local_bam_path = download_file(args.bam_s3_path, working_dir)
    print("BAM downloaded to %s" % local_bam_path)

    print("Downloading bam index")
    local_bam_index_path = download_file(args.bai_s3_path, working_dir)
    print("BAM index downloaded to %s" % local_bam_index_path)

    print("Running verifybamid")
    local_stats_path = run_verifybamid_basic(local_vcf_path, local_bam_path,
                                             local_bam_index_path,
                                             args.opt_list, working_dir)

    for ext in ['.selfSM', '.bestSM', '.depthSM', '.log']:
        if os.path.exists(local_stats_path + ext):
            print("Uploading %s to %s" %
                  (local_stats_path + ext, args.results_s3_path + ext))
            upload_file(args.results_s3_path + ext, local_stats_path + ext)

    print('Cleaning up working dir')
    delete_working_dir(working_dir)

    print("Completed")
예제 #11
0
파일: run_preQC.py 프로젝트: ImerM/sag_aws
def main():
    argparser = ArgumentParser()

    file_path_group = argparser.add_argument_group(title='File paths')
    file_path_group.add_argument('--fastq1_s3_path',
                                 type=str,
                                 help='FASTQ1 s3 path',
                                 required=True)
    file_path_group.add_argument('--fastq2_s3_path',
                                 type=str,
                                 help='FASTQ2 s3  path',
                                 required=True)

    run_group = argparser.add_argument_group(title='Run command args')
    run_group.add_argument('--preproc_cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')
    run_group.add_argument('--index_cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')
    run_group.add_argument('--preqc_cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')
    run_group.add_argument('--preqc_report_cmd_args',
                           type=str,
                           help='Arguments for preprocessing',
                           default=' ')

    argparser.add_argument('--working_dir', type=str, default='/scratch')
    argparser.add_argument('--results_folder', type=str)

    args = argparser.parse_args()

    #working_dir = os.path.join(args.working_dir, 'fa534753-c43b-4591-8bd5-2ffdd1123a33')
    working_dir = generate_working_dir(args.working_dir)

    # Download fastq files and reference files
    print('Downloading FASTQs')
    fastq_files = '{0} {1}'.format(
        download_fastq_file(args.fastq1_s3_path, working_dir),
        download_fastq_file(args.fastq2_s3_path, working_dir))

    print('Running preprocess')
    preprocessed_file = run_preprocess(args.preproc_cmd_args, fastq_files,
                                       working_dir)
    time.sleep(10)
    print('Running index')
    indexed_file = run_index(preprocessed_file, args.index_cmd_args,
                             working_dir)
    #indexed_file = ''
    time.sleep(10)
    print('Running preqc')
    preqc_file = run_preqc(indexed_file, args.preqc_cmd_args,
                           args.preqc_report_cmd_args, working_dir)

    print('Uploading results to %s' % args.results_folder)
    upload_results(args.results_folder, working_dir)
    print('Cleaning up working dir')
    delete_working_dir(working_dir)
    print('Completed')
    """