def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--input_files', type=str, help='S3 paths for input files', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--cmd_args', type=str, help='Arguments for preprocessing', default=' ') argparser.add_argument('--working_dir', type=str, default='/scratch') argparser.add_argument('--results_path', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) # Download fastq files and reference files print('Downloading FASTQs') fastq_files = generate_input_string(args.input_files, working_dir) print('Running Kraken') kraken_folder = run_kraken(args.cmd_args, fastq_files, working_dir) print('Uploading results to %s' % args.results_path) upload_folder(args.results_path, kraken_folder) print('Cleaning up working dir') delete_working_dir(working_dir) print('Completed')
def main(): argparser = parse4vcf2tiledb() args, extr = argparser.parse_known_args() print(args) if args.index == None: idx = int(os.getenv('AWS_BATCH_JOB_ARRAY_INDEX')) else: idx = args.index loader_path, callset_path, vid_path = download_required_files(args.loader_s3_path, args.callset_s3_path, args.vid_s3_path) if os.getenv('GETEBS'): initEBS(WORKDIR) else: if not os.path.exists(WORKDIR): os.mkdir(WORKDIR) if not os.path.exists(PVCFDIR): os.mkdir(PVCFDIR) # Run program run_vcf2tiledb_no_s3(WORKDIR, idx, loader_path, callset_path, vid_path, args.chr) if not os.getenv('SKIP_UPLOAD'): print("Uploading to %s" % (args.results_s3_path) ) upload_folder(args.results_s3_path, PVCFDIR) print ("Completed vcf2tiledb")
def main(): argparser = ArgumentParser() file_path_group = argparser.add_argument_group(title='File paths') file_path_group.add_argument('--bam_s3_path', type=str, help='BAM s3 path', required=True) file_path_group.add_argument('--bai_s3_path', type=str, help='BAM Index s3 path', required=True) file_path_group.add_argument('--vcf_s3_path', type=str, help='VCF s3 path', required=True) file_path_group.add_argument('--reference_s3_path', type=str, help='Reference file s3 path', required=True) file_path_group.add_argument('--reference_index_s3_path', type=str, help='Reference file index s3 path', required=True) run_group = argparser.add_argument_group(title='Run command args') run_group.add_argument('--memory', type=str, help='Memory (in GB) for strelka to use', default=28) run_group.add_argument('--cmd_args', type=str, help='Additional arguments for platypus', default='') argparser.add_argument('--working_dir', type=str, default='/scratch') args = argparser.parse_args() working_dir = generate_working_dir(args.working_dir) print("Downloading bam") local_bam_path = download_file(args.bam_s3_path, working_dir) local_bai_path = download_file(args.bai_s3_path, working_dir) print("BAM and index donwloaded to %s and %s" % (local_bam_path, local_bai_path)) print("Downloading reference") local_reference_path = download_file(args.reference_s3_path, working_dir) local_reference_index_path = download_file(args.reference_index_s3_path, working_dir) print("Reference downloaded to %s. Index to %s" % (local_reference_path, local_reference_index_path)) print("Running Strelka") local_vcf_path = run_strelka(local_bam_path, local_reference_path, args.memory, args.cmd_args, working_dir) print("Uploading %s to %s" % (local_vcf_path, args.vcf_s3_path)) upload_folder(args.vcf_s3_path, local_vcf_path) print('Cleaning up working dir') delete_working_dir(working_dir) print("Completed")
def upload_bam(bam_s3_path, local_folder_path): """ Uploads results folder containing the bam file (and associated output) :param bam_s3_path: S3 path to upload the alignment results to :param local_folder_path: local path containing the alignment results """ upload_folder(bam_s3_path, local_folder_path)
def upload_results(folder_s3_path, local_folder_path): """ Uploads results folder (and associated output) :param folder_s3_path: S3 path to upload the assembly results to :param local_folder_path: local path containing the assembly results """ upload_folder(folder_s3_path, local_folder_path)
def upload_results(results_path, working_dir): """ Uploads results folder containing the bam file (and associated output) :param bam_s3_path: S3 path to upload the alignment results to :param local_folder_path: local path containing the alignment results """ upload_folder(results_path, os.path.join(working_dir, 'preqc')) upload_folder(results_path, os.path.join(working_dir, 'fragsizes'))
def main(): argparser = parse_args() args, extr = argparser.parse_known_args() print(args) ref_idx = args.ref_s3_path + '.fai' if args.sample_s3_path.rpartition('.')[-1] == 'bam': input_idx = args.sample_s3_path + '.bai' elif args.sample_s3_path.rpartition('.')[-1] == 'cram': input_idx = args.sample_s3_path + '.crai' else: raise Exception("Unknown input type") if os.getenv('GETEBS'): initEBS(WORKDIR) else: if not os.path.exists(WORKDIR): os.mkdir(WORKDIR) ref_path, ref_idx_path, input_path, idx_path, region_path = download_required_files( WORKDIR, args.ref_s3_path, ref_idx, args.sample_s3_path, input_idx, args.regions) output_dir = WORKDIR + '/results' os.mkdir(output_dir) output_prefix = output_dir + '/{SAMPLE}.hg38.realign.bqsr'.format( SAMPLE=args.sample_name) # Run program run_xatlas_basic(args.sample_name, input_path, ref_path, args.threads, region_path, output_prefix, output_dir) if not os.getenv('SKIP_UPLOAD'): print("Uploading to %s" % (args.results_s3_path)) upload_folder(args.results_s3_path, output_dir) print("Completed xatlas for %s" % args.sample_name)
# check refdata cmd = "ls -l refdata-cellranger-GRCh38-1.2.0" subprocess.check_call(shlex.split(cmd)) # Run Cellranger MKFASTQ and COUNT ############################################ # cellranger mkfastqs cmd = 'cellranger mkfastq --id=tiny-bcl-output --run=tiny-bcl/cellranger-tiny-bcl-1.2.0/ --csv=tiny-bcl/cellranger-tiny-bcl-samplesheet-1.2.0.csv' subprocess.check_call(shlex.split(cmd)) # # use full path for reference transcriptome # # cellranger count cmd = 'cellranger count --id=test_sample --fastqs=tiny-bcl-output/outs/fastq_path/p1/s1 --sample=test_sample --expect-cells=1000 --localmem=3 --chemistry=SC3Pv2 --transcriptome=refdata-cellranger-GRCh38-1.2.0' subprocess.check_call(shlex.split(cmd)) # # Copy data back to S3 # ########################### # copy mkfastq outputs s3_path = 's3://' + inst_bucket + '/tiny-bcl-output' fcs_files_path = 'tiny-bcl-output' upload_folder(s3_path, fcs_files_path) # copy count outputs s3_path = 's3://' + inst_bucket + '/test_sample' fcs_files_path = 'test_sample' upload_folder(s3_path, fcs_files_path)