def main(args, outs): hostname = socket.gethostname() print "Checking run folder..." tk_preflight.check_rta_complete(args.run_path) print "Checking RunInfo.xml..." runinfo = tk_preflight.check_runinfo_xml(args.run_path) if not args.allow_no_barcodes: ok, msg = check_reads(runinfo) if not ok: martian.exit(msg) print "Checking system environment..." ok, msg = tk_preflight.check_ld_library_path() if not ok: martian.exit(msg) # Presence of SampleSheet.csv interferes with demux. # Ask customer to move it. Under older RTA, bcl2fastq looks for it # in Data/Intensities/BaseCalls while under newer RTA, it looks for it # at the top of the run folder. bc_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls") for ss_dir in [args.run_path, bc_dir]: ilmn_sample_sheet = os.path.join(ss_dir, "SampleSheet.csv") external = True try: import kitten external = False except ImportError: pass if external and os.path.exists(ilmn_sample_sheet): martian.exit( "On machine: %s, SampleSheet.csv found in run folder that would interfere with demux:\n%s\nPlease move, rename, or delete the file and run demux again." % (hostname, ilmn_sample_sheet)) if args.check_executables: print "Checking bcl2fastq..." # Determine the RTA version of the run and whether this instrument # requires i2 to RC'd (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path) martian.log_info("RTA Version: %s" % rta_version) martian.log_info("BCL Params: %s" % str(bcl_params)) # Determine the best available bcl2fastq version to use # Will call martian.exit() with an error message if there isn't # a compatible version available (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version) martian.log_info("Running bcl2fastq mode: %s. Version: %s" % (major_ver, full_ver)) ok, msg = tk_preflight.check_open_fh() if not ok: martian.exit(msg)
def main(args, outs): hostname = socket.gethostname() print "Checking run folder..." tk_preflight.check_rta_complete(args.run_path) print "Checking RunInfo.xml..." runinfo = tk_preflight.check_runinfo_xml(args.run_path) print "Checking system environment..." ok, msg = tk_preflight.check_ld_library_path() if not ok: martian.exit(msg) print "Checking barcode whitelist..." tk_preflight.check_barcode_whitelist(args.barcode_whitelist) if args.check_executables: print "Checking bcl2fastq..." (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path) martian.log_info("RTA Version: %s" % rta_version) martian.log_info("BCL Params: %s" % str(bcl_params)) (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version) martian.log_info("Running bcl2fastq mode: %s. Version: %s" % (major_ver, full_ver)) if '--no-lane-splitting' in args.bcl2fastq2_args: martian.exit("The --no-lane-splitting option is not supported.") print "Emitting run information..." martian.log_info("-------mkfastq diagnostic start-------") emit_info(args) print "Checking read specification..." check_read_params(args, runinfo) martian.log_info("-------mkfastq diagnostic end-------") print "Checking samplesheet specs..." check_specs(args) print "Checking for dual index flowcell..." check_dual_index(args, runinfo) ok, msg = tk_preflight.check_open_fh() if not ok: martian.exit(msg)
def main(args, outs): hostname = socket.gethostname() print "Checking run folder..." tk_preflight.check_rta_complete(args.run_path) print "Checking RunInfo.xml..." tk_preflight.check_runinfo_xml(args.run_path) print "Checking system environment..." ok, msg = tk_preflight.check_ld_library_path() if not ok: martian.exit(msg) print "Checking barcode whitelist..." tk_preflight.check_barcode_whitelist(args.barcode_whitelist) if args.check_executables: print "Checking bcl2fastq..." (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path) martian.log_info("RTA Version: %s" % rta_version) martian.log_info("BCL Params: %s" % str(bcl_params)) (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version) martian.log_info("Running bcl2fastq mode: %s. Version: %s" % (major_ver, full_ver)) ok, msg = tk_preflight.check_open_fh() if not ok: martian.exit(msg) if args.output_path is not None: tk_preflight.check_folder_or_create("--output-dir", args.output_path, hostname, permission=os.W_OK|os.X_OK) if args.interop_output_path is not None: tk_preflight.check_folder_or_create("--interop-dir", args.interop_output_path, hostname, permission=os.W_OK|os.X_OK) if args.max_bcl2fastq_threads < 1: msg = "Cannot run bcl2fastq with zero threads." martian.exit(msg)
def run_bcl2fastq(args, outs): input_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls") if args.output_path: outs.fastq_path = args.output_path output_dir = outs.fastq_path if args.interop_output_path: outs.interop_path = args.interop_output_path interop_dir = outs.interop_path martian.log_info("Running bcl2fastq on run: %s" % args.run_path) martian.log_info("FASTQ output dir: %s" % output_dir) run_info_xml = os.path.join(args.run_path, "RunInfo.xml") read_info, flowcell = tk_bcl.load_run_info(run_info_xml) if not args.bases_mask: use_bases_mask_val = tk_bcl.make_bases_mask_val( read_info, sample_index_read=args.si_read_type, dual_indexed=args.dual_indexed_samplesheet, ignore_dual_index=args.ignore_dual_index) else: use_bases_mask_val = args.bases_mask outs.file_read_types_map = tk_bcl.get_bcl2fastq_read_type_map( read_info, sample_index_read=args.si_read_type, dual_indexed=args.dual_indexed_samplesheet, ignore_dual_index=args.ignore_dual_index) # Determine the RTA version of the run and whether this instrument # requires i2 to be RC'd (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path) outs.rc_i2_read = rc_i2_read martian.log_info("BCL folder RTA Version: %s" % rta_version) martian.log_info("BCL params: %s" % str(bcl_params)) # Determine the best available bcl2fastq version to use # Will call martian.exit() with an error message if there isn't # a compatible version available hostname = socket.gethostname() (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version) outs.bcl2fastq_version = full_ver martian.log_info("Using bcl2fastq version: %s" % full_ver) martian.log_info("RC'ing i2 read: %s" % str(rc_i2_read)) # Restore the LD_LIBRARY_PATH set aside by sourceme.bash/shell10x. # Only do this for the environment in which BCL2FASTQ will run. new_environ = dict(os.environ) new_environ['LD_LIBRARY_PATH'] = os.environ['_TENX_LD_LIBRARY_PATH'] if major_ver == tk_bcl.BCL2FASTQ_V1: martian.exit( "bcl2fastq 1.8.4 is not currently supported. Please install bcl2fastq2, or use the 10x 'demux' pipeline instead." ) # configure cmd = [ "configureBclToFastq.pl", "--use-bases-mask=" + use_bases_mask_val, "--fastq-cluster-count", "20000000", "--input-dir=" + input_dir, "--output-dir=" + output_dir, "--no-eamss", "--ignore-missing-bcl", "--ignore-missing-control", "--ignore-missing-stats", "--sample-sheet=" + args.samplesheet_path ] cmd += remove_deprecated_args(args.bcl2fastq1_args, major_ver, full_ver) martian.log_info("Running bcl2fastq v1 setup command:") martian.log_info(" ".join(cmd)) outs.bcl2fastq_args = " ".join(cmd) try: ret = tk_proc.call(cmd, env=new_environ) except OSError: martian.throw( "configureBclToFastq.pl not found on path -- make sure you've added it to your environment" ) if ret != 0: martian.throw("configureBclToFastq.pl failed. Exiting.") # Run the actual makefiles makefile = os.path.join(output_dir, "Makefile") if not os.path.exists(makefile): martian.throw("BclToFastq Makefile not found where expected: %s" % makefile) martian.log_info("Running Makefile...") mk_cmd = ["make", "-C", output_dir, "-j", str(args.num_threads)] martian.log_info(" ".join(mk_cmd)) ret = tk_proc.call(mk_cmd, env=new_environ) if ret > 0: martian.throw( "Running the BclToFastq Makefile failed with code: %d. Exiting" % ret) elif ret < 0: martian.throw("Bcl2Fastq was killed with signal %d." % ret) elif major_ver == tk_bcl.BCL2FASTQ_V2: if not os.path.exists(outs.interop_path): os.makedirs(outs.interop_path) if not os.path.exists(outs.fastq_path): os.makedirs(outs.fastq_path) # minimum-trimmed-read-length and mask-short-adapter-reads must be our call (SIs, UMIs) min_read_length = min([x["read_length"] for x in read_info]) if min_read_length > 8: # ensure min is at sample-index, if extra base grabbed for QC purposes (I8n, for example) min_read_length = 8 cmd = [ "bcl2fastq", "--minimum-trimmed-read-length", str(min_read_length), "--mask-short-adapter-reads", str(min_read_length), "--create-fastq-for-index-reads", "--ignore-missing-positions", "--ignore-missing-filter", "--ignore-missing-bcls", #'-r', str(args.__threads), '-w', str(args.__threads), "--use-bases-mask=" + use_bases_mask_val, "-R", args.run_path, "--output-dir=" + output_dir, "--interop-dir=" + interop_dir, "--sample-sheet=" + args.samplesheet_path ] cmd += remove_deprecated_args(args.bcl2fastq2_args, major_ver, full_ver) outs.bcl2fastq_args = " ".join(cmd) martian.log_info("Running bcl2fastq2: %s" % (" ".join(cmd))) try: ret = tk_proc.call(cmd, env=new_environ) except OSError: martian.throw( "bcl2fastq not found on PATH -- make sure you've added it to your environment" ) if ret > 0: files_path = os.path.abspath(martian.make_path('_stderr')) enclosing_path = os.path.dirname(os.path.dirname(files_path)) stderr_path = os.path.join(enclosing_path, '_stderr') martian.exit( "bcl2fastq exited with an error. You may have specified an invalid command-line option. See the full error here:\n%s" % stderr_path) elif ret < 0: # subprocess.call returns negative code (on UNIX): bcl2fastq killed by external signal martian.exit("bcl2fastq was killed with signal %d." % ret)
def process_raw_ilmn_data(args, outs): """ run_path must be the top-level Illumina run directory """ input_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls") output_dir = outs.raw_fastq_path martian.log_info("Running bcl2fastq on run: %s" % args.run_path) martian.log_info("FASTQ output dir: %s" % output_dir) if not os.path.exists(args.run_path): martian.throw("Run directory does not exist: %s" % args.run_path) run_info_xml = os.path.join(args.run_path, "RunInfo.xml") read_info, flowcell = tk_bcl.load_run_info(run_info_xml) use_bases_mask_val = tk_bcl.make_bases_mask_val(read_info) # Determine the RTA version of the run and whether this instrument # requires i2 to RC'd (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path) martian.log_info("BCL folder RTA Version: %s" % rta_version) martian.log_info("BCL params: %s" % str(bcl_params)) # Determine the best available bcl2fastq version to use # Will call martian.exit() with an error message if there isn't # a compatible version available hostname = socket.gethostname() (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version) martian.log_info("Using bcl2fastq version: %s" % full_ver) tile_split = args.tile_suffix != '*' try: # Internal use only. Move aside Illumina sample sheet so # bcl2fastq doesn't use it. For customers, there is a pre-flight # check to make sure there is no sample sheet in the places # bcl2fastq looks for it. import kitten # Older RTA put sheet into Data/Intensities/BaseCalls while # newer RTA put sheet at top of the BCL folder. Check both. for ss_dir in [args.run_path, input_dir]: ilmn_sample_sheet = os.path.join(ss_dir, "SampleSheet.csv") mv_sample_sheet = os.path.join(ss_dir, "IlluminaSampleSheet.csv") if os.path.exists(ilmn_sample_sheet): martian.log_info("Renaming the Illumina sample sheet") os.rename(ilmn_sample_sheet, mv_sample_sheet) except ImportError: pass # Restore the LD_LIBRARY_PATH set aside by sourceme.bash/shell10x. # Only do this for the environment in which BCL2FASTQ will run. new_environ = dict(os.environ) new_environ['LD_LIBRARY_PATH'] = os.environ['_TENX_LD_LIBRARY_PATH'] if major_ver == tk_bcl.BCL2FASTQ_V1: if tile_split: martian.throw( "Cannot support NovaSeq demux scheme on bcl2fastq v1. Exiting." ) # configure # write bigger fastq chunks to avoid blow-up of chunks cmd = [ "configureBclToFastq.pl", "--fastq-cluster-count", "20000000", "--no-eamss", "--use-bases-mask=" + use_bases_mask_val, "--input-dir=" + input_dir, "--output-dir=" + output_dir ] martian.log_info("Running bcl2fastq setup command:") martian.log_info(" ".join(cmd)) try: ret = tenkit.log_subprocess.call(cmd, env=new_environ) except OSError: martian.throw( "configureBclToFastq.pl not found on path -- make sure you've added it to your environment" ) if ret != 0: martian.throw("configureBclToFastq.pl failed. Exiting.") # Run the actual makefiles makefile = os.path.join(output_dir, "Makefile") if not os.path.exists(makefile): martian.throw("BclToFastq Makefile not found where expected: %s" % makefile) martian.log_info("Running Makefile...") mk_cmd = ["make", "-C", output_dir, "-j", str(args.num_threads)] martian.log_info(" ".join(mk_cmd)) ret = tenkit.log_subprocess.call(mk_cmd, env=new_environ) if ret > 0: martian.throw( "running the BclToFastq Makefile failed with code: %d. Exiting" % ret) elif ret < 0: martian.throw("Bcl2Fastq was killed with signal %d." % ret) elif major_ver == tk_bcl.BCL2FASTQ_V2: if tile_split: proj_output_dir = os.path.join(output_dir, "Tile%s" % args.tile_suffix, "Project_%s" % flowcell) else: proj_output_dir = os.path.join(output_dir, "Project_%s" % flowcell) fastq_output_dir = os.path.join(proj_output_dir, "fastq") interop_output_dir = os.path.join(proj_output_dir, "interop") if not os.path.exists(fastq_output_dir): os.makedirs(fastq_output_dir) if not os.path.exists(interop_output_dir): os.makedirs(interop_output_dir) min_read_length = min([x["read_length"] for x in read_info]) if tile_split: flowcell_info = tk_lane.get_flowcell_layout(run_info_xml) if flowcell_info.tile_length is None: martian.throw( "Cannot determine tile name length from RunInfo.xml") tiles_regex_prefix = "[0-9]" * (flowcell_info.tile_length - 1) tiles_regex = "%s%s" % (tiles_regex_prefix, args.tile_suffix) cmd = [ "bcl2fastq", "--minimum-trimmed-read-length", str(min_read_length), # PIPELINES-1140 - required in bcl2fastq 2.17 to generate correct index read fastqs "--mask-short-adapter-reads", str(min_read_length), # LONGRANGER-121 - ignore missing bcl data "--ignore-missing-bcls", "--ignore-missing-filter", "--ignore-missing-positions", "--ignore-missing-controls", '-r', str(args.__threads), '-w', str(args.__threads), # TENKIT-72 avoid CPU oversubscription '-p', str(args.__threads), "--use-bases-mask=" + use_bases_mask_val, "-R", args.run_path, "--output-dir=" + fastq_output_dir, "--interop-dir=" + interop_output_dir, "--tiles=" + tiles_regex ] else: cmd = [ "bcl2fastq", "--minimum-trimmed-read-length", str(min_read_length), # PIPELINES-1140 - required in bcl2fastq 2.17 to generate correct index read fastqs "--mask-short-adapter-reads", str(min_read_length), # LONGRANGER-121 - ignore missing bcl data "--ignore-missing-bcls", "--ignore-missing-filter", "--ignore-missing-positions", "--ignore-missing-controls", '-r', str(args.__threads), '-w', str(args.__threads), # TENKIT-72 avoid CPU oversubscription '-p', str(args.__threads), "--use-bases-mask=" + use_bases_mask_val, "-R", args.run_path, "--output-dir=" + fastq_output_dir, "--interop-dir=" + interop_output_dir ] martian.log_info("Running bcl2fastq 2: %s" % (" ".join(cmd))) try: ret = tenkit.log_subprocess.call(cmd, env=new_environ) except OSError: martian.throw( "bcl2fastq not found on PATH -- make sure you've added it to your environment" ) if ret > 0: martian.exit("bcl2fastq failed. Exiting.") elif ret < 0: martian.exit("bcl2fastq was killed with signal %d." % ret) # Glob over all lanes - demultiplex handles whether to collapse them if tile_split: fastq_glob = os.path.join(output_dir, "Tile*", "Project_" + flowcell, "*", "*.fastq*") else: fastq_glob = os.path.join(output_dir, "Project_" + flowcell, "*", "*.fastq*") start_fastq_files = glob.glob(fastq_glob) # File renaming -- bcl2fastq names the reads R1, R2, R3, R4 # Use our conventions to make them R1, I1, I2, R2, as the case may be. rename_fastq_files(read_info, start_fastq_files)