def main(self, args): """ Internal: provides mock bcl2fastq2 functionality """ # Build generic header header = """BCL to FASTQ file converter bcl2fastq v2.17.1.14 Copyright (c) 2007-2015 Illumina, Inc. 2015-12-17 14:08:00 [7fa113f3f780] Command-line invocation: bcl2fastq %s""" \ % ' '.join(args) # Handle version request if "--version" in args: print header return self._exit_code # Deal with arguments p = argparse.ArgumentParser() p.add_argument("--runfolder-dir", action="store") p.add_argument("--output-dir", action="store") p.add_argument("--sample-sheet", action="store") p.add_argument("--use-bases-mask", action="store") p.add_argument("--barcode-mismatches", action="store") p.add_argument("--minimum-trimmed-read-length", action="store") p.add_argument("--mask-short-adapter-reads", action="store") p.add_argument("--ignore-missing-bcls", action="store_true") p.add_argument("--no-lane-splitting", action="store_true") p.add_argument("-r", action="store") p.add_argument("-d", action="store") p.add_argument("-p", action="store") p.add_argument("-w", action="store") args = p.parse_args(args) # Check bases mask if self._assert_bases_mask: print "Checking bases mask: %s" % args.use_bases_mask assert (args.use_bases_mask == self._assert_bases_mask) # Platform print "Platform (default): %s" % self._platform # Run folder (input data) runfolder = args.runfolder_dir print "Runfolder dir: %s" % runfolder if runfolder is None: return 1 run_info_xml = os.path.join(runfolder, "RunInfo.xml") if not os.path.exists(run_info_xml): return 1 # Determine if run is paired end nreads = 0 for r in IlluminaRunInfo(run_info_xml).reads: if r['is_indexed_read'] == 'N': nreads += 1 if nreads == 2: paired_end = True else: paired_end = False print "Paired-end: %s" % paired_end # Lanes lanes = IlluminaRun(runfolder, platform=self._platform).lanes print "Lanes: %s" % lanes # Output folder output_dir = args.output_dir if output_dir is None: output_dir = "bcl2fastq" print "Output dir: %s" % output_dir # Sample sheet sample_sheet = args.sample_sheet if sample_sheet is None: for d in (runfolder, os.path.join(runfolder, "Data", "Intensities", "BaseCalls")): sample_sheet = os.path.join(d, "SampleSheet.csv") if os.path.exists(sample_sheet): break sample_sheet = None print "Sample sheet: %s" % sample_sheet # Modifiers no_lane_splitting = bool(args.no_lane_splitting) print "No lane splitting: %s" % no_lane_splitting # Generate mock output based on inputs tmpname = "tmp.%s" % uuid.uuid4() output = MockIlluminaData(name=tmpname, package="bcl2fastq2", unaligned_dir="bcl2fastq") missing_fastqs = self._missing_fastqs # Add outputs from sample sheet (if supplied) if sample_sheet is not None: s = SampleSheetPredictor(sample_sheet_file=sample_sheet) s.set(paired_end=paired_end, no_lane_splitting=no_lane_splitting, lanes=lanes) for project in s.projects: print "Adding project: %s" % project.name for sample in project.samples: for fq in sample.fastqs(): if missing_fastqs and (fq in missing_fastqs): continue if sample.sample_name is None: sample_name = sample.sample_id else: sample_name = sample.sample_name output.add_fastq(project.name, sample_name, fq) # Add undetermined fastqs # NB Would like to use the 'add_undetermined' # method but this doesn't play well with using # the predictor-based approach above if paired_end: reads = (1, 2) else: reads = (1, ) if no_lane_splitting: lanes = None for r in reads: if lanes is None: output.add_fastq("Undetermined_indices", "undetermined", "Undetermined_S0_R%d_001.fastq.gz" % r) else: for lane in lanes: output.add_fastq( "Undetermined_indices", "undetermined", "Undetermined_S0_L%03d_R%d_001.fastq.gz" % (lane, r)) # Build the output directory output.create() # Move to final location os.rename(os.path.join(tmpname, "bcl2fastq"), output_dir) shutil.rmtree(tmpname) return self._exit_code