def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--input_files', type=str, help='S3 paths for input files', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--results_path', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') fastq_files = generate_input_string(args.input_files, working_dir) print('Running Kraken') kraken_folder = run_kraken(args.cmd_args, fastq_files, working_dir) print('Uploading results to %s' % args.results_path) upload_folder(args.results_path, kraken_folder) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed')
def main(): argparser = ArgumentParser() argparser.add_argument('--vcf_s3_path', type=str, help='VCF s3 path', required=True) argparser.add_argument('--annotated_vcf_s3_path', type=str, help='Annotated vcf s3 path', required=True) argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--cmd_args', type=str, help='arguments/options for snpeff', default='-t') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) print('Downloading vcf') local_vcf_path = download_file(args.vcf_s3_path, working_dir) print('Running snpeff') annotated_vcf_path = run_snpeff(local_vcf_path, args.cmd_args, working_dir) print('Uploading %s to %s' % (annotated_vcf_path, args.annotated_vcf_s3_path)) upload_file(args.annotated_vcf_s3_path, annotated_vcf_path) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed')
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--bam_s3_path', type=str, help='BAM s3 path', required=True) file_path_group.add_argument('--bai_s3_path', type=str, help='BAM Index s3 path', required=True) file_path_group.add_argument('--vcf_s3_path', type=str, help='VCF s3 path', required=True) file_path_group.add_argument('--reference_s3_path', type=str, help='Reference file s3 path', required=True) file_path_group.add_argument('--reference_index_s3_path', type=str, help='Reference file index s3 path', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--memory', type=str, help='Memory (in GB) for strelka to use', default=28) run_group.add_argument('--cmd_args', type=str, help='Additional arguments for platypus', default='') argparser.add_argument('--working_dir', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) print("Downloading bam") local_bam_path = download_file(args.bam_s3_path, working_dir) local_bai_path = download_file(args.bai_s3_path, working_dir) print("BAM and index donwloaded to %s and %s" % (local_bam_path, local_bai_path)) print("Downloading reference") local_reference_path = download_file(args.reference_s3_path, working_dir) local_reference_index_path = download_file(args.reference_index_s3_path, working_dir) print("Reference downloaded to %s. Index to %s" % (local_reference_path, local_reference_index_path)) print("Running Strelka") local_vcf_path = run_strelka(local_bam_path, local_reference_path, args.memory, args.cmd_args, working_dir) print("Uploading %s to %s" % (local_vcf_path, args.vcf_s3_path)) upload_folder(args.vcf_s3_path, local_vcf_path) print('Cleaning up working dir') delete_working_dir(working_dir) print("Completed")
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--input_file', type=str, help='s3 path', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/docker_share') argparser.add_argument('--results_path', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') input_file = download_fastq_file(args.input_file, working_dir) print('Running busco') busco_folder = run_busco(args.cmd_args, input_file, working_dir) print('Uploading results to %s' % args.results_path) upload_results(args.results_path, busco_folder) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed')
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--bam_s3_folder_path', type=str, help='BAM s3 path', required=True) file_path_group.add_argument('--fastq1_s3_path', type=str, help='FASTQ1 s3 path', required=True) file_path_group.add_argument('--fastq2_s3_path', type=str, help='FASTQ2 s3 path', required=True) file_path_group.add_argument('--reference_s3_path', type=str, help='reference file', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--memory', type=str, help='Memory for Isaac in GB', default='76') run_group.add_argument('--cmd_args', type=str, help='Arguments for Isaac', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') fastq_folder_path = download_fastq_files(args.fastq1_s3_path, args.fastq2_s3_path, working_dir) print('Downloading Reference') reference_folder_path = download_reference(args.reference_s3_path, working_dir) print('Running Isaac') bam_folder_path = run_isaac(reference_folder_path, fastq_folder_path, args.memory, args.cmd_args, working_dir) print('Uploading results to %s' % args.bam_s3_folder_path) upload_bam(args.bam_s3_folder_path, bam_folder_path) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed')
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument( '--input_flags', type=str, help='All the flags for the files, in correct order', required=True) file_path_group.add_argument( '--input_files', type=str, help='All the S3 file locations, in correct order', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--results_path', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) print('Downloading files and forming input parameters') print('input flags', args.input_flags) print('input_files', args.input_files) input_arguments = generate_input_string(args.input_files, args.input_flags, working_dir) #Download to container the fastq files try: print('Running SPAdes') results_folder_path = run_spades(input_arguments, args.cmd_args, working_dir) except Exception as e: upload_results(args.results_path, working_dir) upload_results(args.results_path, results_folder_path) print('Cleaning the working dir') delete_working_dir(working_dir) print('Completed') """
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--bam_s3_path', type=str, help='BAM s3 path', required=True) file_path_group.add_argument('--reference_s3_path', type=str, help='reference file', required=True) file_path_group.add_argument('--bam_stats_s3_path', type=str, help='S3 Path to upload stats', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for platypus', default='') argparser.add_argument('--working_dir', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) print("Downloading bam") local_bam_path = download_file(args.bam_s3_path, working_dir) print("BAM downloaded to %s" % local_bam_path) print("Downloading reference") local_reference_path = download_file(args.reference_s3_path, working_dir) print("Reference downloaded to %s." % local_reference_path) print("Running samtools stats") local_stats_path = run_samtools_stats(local_bam_path, local_reference_path, args.cmd_args, working_dir) print("Uploading %s to %s" % (local_stats_path, args.bam_stats_s3_path)) upload_file(args.bam_stats_s3_path, local_stats_path) print('Cleaning up working dir') delete_working_dir(working_dir) print("Completed")
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--fastq1_s3_path', type=str, help='FASTQ1 s3 path', required=True) file_path_group.add_argument('--fastq2_s3_path', type=str, help='FASTQ2 s3 path', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--results_path', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') fastq_files = '{0} {1}'.format( download_fastq_file(args.fastq1_s3_path, working_dir), download_fastq_file(args.fastq2_s3_path, working_dir)) print('Running trim_galore') trimmed_folder = run_trim_galore(args.cmd_args, fastq_files, working_dir) print('Uploading results to %s' % args.results_path) upload_bam(args.results_path, trimmed_folder) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed') """
def main(): args = parseArguments() logging.basicConfig(level=args.log_level) logger.info("Run cohort-matcher Docker CLI v%s", __version__) logger.info(args) working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files logger.info('Downloading bam sheets') set1_bamsheet = download_file(args.set1_s3_path, working_dir) set2_bamsheet = download_file(args.set2_s3_path, working_dir) # Download reference bundles if args.set1_reference == 'hg19' or args.set2_reference == 'hg19': logger.info("Downloading hg19 reference bundle") download_file('s3://bmsrd-ngs-repo/reference/hg19-cohort-matcher.tar.bz2', working_dir) logger.info("Uncompressing hg19 reference bundle") uncompress(os.path.join(working_dir, 'hg19-cohort-matcher.tar.bz2'), working_dir) if args.set2_reference == 'GRCh37' or args.set2_reference == 'GRCh37': logger.info("Downloading GRCh37 reference bundle") download_file('s3://bmsrd-ngs-repo/reference/GRCh37-cohort-matcher.tar.bz2', working_dir) logger.info("Uncompressing GRCh37 reference bundle") uncompress(os.path.join(working_dir, 'GRCh37-cohort-matcher.tar.bz2', working_dir)) # Run cohort-matcher logger.info('Running cohort-matcher') if args.max_jobs is None: max_jobs = multiprocessing.cpu_count() else: max_jobs = args.max_jobs output_folder_path = run_cohort_matcher(args.log_level, set1_bamsheet, set2_bamsheet, args.set1_reference, args.set2_reference, working_dir, args.output_prefix, max_jobs) logger.info('Uploading results to %s', args.s3_output_folder_path) #upload_bam(args.bam_s3_folder_path, bam_folder_path) logger.info('Cleaning up working dir') delete_working_dir(working_dir) logger.info('Completed')
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--vcf_s3_path', type=str, help='VCF s3 path', required=True) file_path_group.add_argument('--bam_s3_path', type=str, help='BAM s3 path', required=True) file_path_group.add_argument('--bai_s3_path', type=str, help='BAI s3 path', required=True) file_path_group.add_argument('--results_s3_path', type=str, help='S3 Path to upload stats', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Additional Arguments', default=None, nargs='*', action='store', dest='opt_list') #argparser.add_argument('--working_dir', type=str, default='/scratch') args = argparser.parse_args() total_size = 0 for obj in [args.vcf_s3_path, args.bam_s3_path, args.bai_s3_path]: total_size += get_size(obj) # add more for formatting loss total_size += 2e9 total_size = int(total_size) print("Total Size := {0}".format(total_size)) # Declare expected disk usage, triggers host's EBS script (ecs-ebs-manager) with open("/TOTAL_SIZE", "w") as text_file: text_file.write("{0}".format(total_size)) print("Waiting EBS") # Wait for EBS to appear while not os.path.isdir('/scratch'): time.sleep(5) # Wait for mount verification while not os.path.ismount('/scratch'): time.sleep(1) working_dir = generate_working_dir('/scratch') print("Downloading vcf") local_vcf_path = download_file(args.vcf_s3_path, working_dir) print("VCF downloaded to %s" % local_vcf_path) print("Downloading bam") local_bam_path = download_file(args.bam_s3_path, working_dir) print("BAM downloaded to %s" % local_bam_path) print("Downloading bam index") local_bam_index_path = download_file(args.bai_s3_path, working_dir) print("BAM index downloaded to %s" % local_bam_index_path) print("Running verifybamid") local_stats_path = run_verifybamid_basic(local_vcf_path, local_bam_path, local_bam_index_path, args.opt_list, working_dir) for ext in ['.selfSM', '.bestSM', '.depthSM', '.log']: if os.path.exists(local_stats_path + ext): print("Uploading %s to %s" % (local_stats_path + ext, args.results_s3_path + ext)) upload_file(args.results_s3_path + ext, local_stats_path + ext) print('Cleaning up working dir') delete_working_dir(working_dir) print("Completed")
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--fastq1_s3_path', type=str, help='FASTQ1 s3 path', required=True) file_path_group.add_argument('--fastq2_s3_path', type=str, help='FASTQ2 s3 path', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--preproc_cmd_args', type=str, help='Arguments for preprocessing', default=' ') run_group.add_argument('--index_cmd_args', type=str, help='Arguments for preprocessing', default=' ') run_group.add_argument('--preqc_cmd_args', type=str, help='Arguments for preprocessing', default=' ') run_group.add_argument('--preqc_report_cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--results_folder', type=str) args = argparser.parse_args() #working_dir = os.path.join(args.working_dir, 'fa534753-c43b-4591-8bd5-2ffdd1123a33') working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') fastq_files = '{0} {1}'.format( download_fastq_file(args.fastq1_s3_path, working_dir), download_fastq_file(args.fastq2_s3_path, working_dir)) print('Running preprocess') preprocessed_file = run_preprocess(args.preproc_cmd_args, fastq_files, working_dir) time.sleep(10) print('Running index') indexed_file = run_index(preprocessed_file, args.index_cmd_args, working_dir) #indexed_file = '' time.sleep(10) print('Running preqc') preqc_file = run_preqc(indexed_file, args.preqc_cmd_args, args.preqc_report_cmd_args, working_dir) print('Uploading results to %s' % args.results_folder) upload_results(args.results_folder, working_dir) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed') """