Ejemplo n.º 1
0
    def main(self, args):
        """
        Internal: provides mock bcl2fastq2 functionality
        """
        # Build generic header
        header = """BCL to FASTQ file converter
bcl2fastq v2.17.1.14
Copyright (c) 2007-2015 Illumina, Inc.

2015-12-17 14:08:00 [7fa113f3f780] Command-line invocation: bcl2fastq %s""" \
    % ' '.join(args)
        # Handle version request
        if "--version" in args:
            print header
            return self._exit_code
        # Deal with arguments
        p = argparse.ArgumentParser()
        p.add_argument("--runfolder-dir", action="store")
        p.add_argument("--output-dir", action="store")
        p.add_argument("--sample-sheet", action="store")
        p.add_argument("--use-bases-mask", action="store")
        p.add_argument("--barcode-mismatches", action="store")
        p.add_argument("--minimum-trimmed-read-length", action="store")
        p.add_argument("--mask-short-adapter-reads", action="store")
        p.add_argument("--ignore-missing-bcls", action="store_true")
        p.add_argument("--no-lane-splitting", action="store_true")
        p.add_argument("-r", action="store")
        p.add_argument("-d", action="store")
        p.add_argument("-p", action="store")
        p.add_argument("-w", action="store")
        args = p.parse_args(args)
        # Check bases mask
        if self._assert_bases_mask:
            print "Checking bases mask: %s" % args.use_bases_mask
            assert (args.use_bases_mask == self._assert_bases_mask)
        # Platform
        print "Platform (default): %s" % self._platform
        # Run folder (input data)
        runfolder = args.runfolder_dir
        print "Runfolder dir: %s" % runfolder
        if runfolder is None:
            return 1
        run_info_xml = os.path.join(runfolder, "RunInfo.xml")
        if not os.path.exists(run_info_xml):
            return 1
        # Determine if run is paired end
        nreads = 0
        for r in IlluminaRunInfo(run_info_xml).reads:
            if r['is_indexed_read'] == 'N':
                nreads += 1
        if nreads == 2:
            paired_end = True
        else:
            paired_end = False
        print "Paired-end: %s" % paired_end
        # Lanes
        lanes = IlluminaRun(runfolder, platform=self._platform).lanes
        print "Lanes: %s" % lanes
        # Output folder
        output_dir = args.output_dir
        if output_dir is None:
            output_dir = "bcl2fastq"
        print "Output dir: %s" % output_dir
        # Sample sheet
        sample_sheet = args.sample_sheet
        if sample_sheet is None:
            for d in (runfolder,
                      os.path.join(runfolder, "Data", "Intensities",
                                   "BaseCalls")):
                sample_sheet = os.path.join(d, "SampleSheet.csv")
                if os.path.exists(sample_sheet):
                    break
                sample_sheet = None
        print "Sample sheet: %s" % sample_sheet
        # Modifiers
        no_lane_splitting = bool(args.no_lane_splitting)
        print "No lane splitting: %s" % no_lane_splitting
        # Generate mock output based on inputs
        tmpname = "tmp.%s" % uuid.uuid4()
        output = MockIlluminaData(name=tmpname,
                                  package="bcl2fastq2",
                                  unaligned_dir="bcl2fastq")
        missing_fastqs = self._missing_fastqs
        # Add outputs from sample sheet (if supplied)
        if sample_sheet is not None:
            s = SampleSheetPredictor(sample_sheet_file=sample_sheet)
            s.set(paired_end=paired_end,
                  no_lane_splitting=no_lane_splitting,
                  lanes=lanes)
            for project in s.projects:
                print "Adding project: %s" % project.name
                for sample in project.samples:
                    for fq in sample.fastqs():
                        if missing_fastqs and (fq in missing_fastqs):
                            continue
                        if sample.sample_name is None:
                            sample_name = sample.sample_id
                        else:
                            sample_name = sample.sample_name
                        output.add_fastq(project.name, sample_name, fq)
        # Add undetermined fastqs
        # NB Would like to use the 'add_undetermined'
        # method but this doesn't play well with using
        # the predictor-based approach above
        if paired_end:
            reads = (1, 2)
        else:
            reads = (1, )
        if no_lane_splitting:
            lanes = None
        for r in reads:
            if lanes is None:
                output.add_fastq("Undetermined_indices", "undetermined",
                                 "Undetermined_S0_R%d_001.fastq.gz" % r)
            else:
                for lane in lanes:
                    output.add_fastq(
                        "Undetermined_indices", "undetermined",
                        "Undetermined_S0_L%03d_R%d_001.fastq.gz" % (lane, r))
        # Build the output directory
        output.create()
        # Move to final location
        os.rename(os.path.join(tmpname, "bcl2fastq"), output_dir)
        shutil.rmtree(tmpname)
        return self._exit_code