예제 #1
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    runinfo = tk_preflight.check_runinfo_xml(args.run_path)

    if not args.allow_no_barcodes:
        ok, msg = check_reads(runinfo)
        if not ok:
            martian.exit(msg)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    # Presence of SampleSheet.csv interferes with demux.
    # Ask customer to move it. Under older RTA, bcl2fastq looks for it
    # in Data/Intensities/BaseCalls while under newer RTA, it looks for it
    # at the top of the run folder.
    bc_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls")
    for ss_dir in [args.run_path, bc_dir]:
        ilmn_sample_sheet = os.path.join(ss_dir, "SampleSheet.csv")

        external = True
        try:
            import kitten
            external = False
        except ImportError:
            pass

        if external and os.path.exists(ilmn_sample_sheet):
            martian.exit(
                "On machine: %s, SampleSheet.csv found in run folder that would interfere with demux:\n%s\nPlease move, rename, or delete the file and run demux again."
                % (hostname, ilmn_sample_sheet))

    if args.check_executables:
        print "Checking bcl2fastq..."
        # Determine the RTA version of the run and whether this instrument
        # requires i2 to RC'd
        (rta_version, rc_i2_read,
         bcl_params) = tk_bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        # Determine the best available bcl2fastq version to use
        # Will call martian.exit() with an error message if there isn't
        # a compatible version available
        (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" %
                         (major_ver, full_ver))

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)
예제 #2
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    runinfo = tk_preflight.check_runinfo_xml(args.run_path)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    print "Checking barcode whitelist..."
    tk_preflight.check_barcode_whitelist(args.barcode_whitelist)

    if args.check_executables:
        print "Checking bcl2fastq..."
        (rta_version, rc_i2_read,
         bcl_params) = tk_bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" %
                         (major_ver, full_ver))

    if '--no-lane-splitting' in args.bcl2fastq2_args:
        martian.exit("The --no-lane-splitting option is not supported.")

    print "Emitting run information..."
    martian.log_info("-------mkfastq diagnostic start-------")
    emit_info(args)

    print "Checking read specification..."
    check_read_params(args, runinfo)
    martian.log_info("-------mkfastq diagnostic end-------")

    print "Checking samplesheet specs..."
    check_specs(args)

    print "Checking for dual index flowcell..."
    check_dual_index(args, runinfo)

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)
예제 #3
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking run folder..."
    tk_preflight.check_rta_complete(args.run_path)

    print "Checking RunInfo.xml..."
    tk_preflight.check_runinfo_xml(args.run_path)

    print "Checking system environment..."
    ok, msg = tk_preflight.check_ld_library_path()
    if not ok:
        martian.exit(msg)

    print "Checking barcode whitelist..."
    tk_preflight.check_barcode_whitelist(args.barcode_whitelist)

    if args.check_executables:
        print "Checking bcl2fastq..."
        (rta_version, rc_i2_read, bcl_params) = tk_bcl.get_rta_version(args.run_path)
        martian.log_info("RTA Version: %s" % rta_version)
        martian.log_info("BCL Params: %s" % str(bcl_params))

        (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
        martian.log_info("Running bcl2fastq mode: %s.  Version: %s" % (major_ver, full_ver))

    ok, msg = tk_preflight.check_open_fh()
    if not ok:
        martian.exit(msg)

    if args.output_path is not None:
        tk_preflight.check_folder_or_create("--output-dir", args.output_path, hostname, permission=os.W_OK|os.X_OK)

    if args.interop_output_path is not None:
        tk_preflight.check_folder_or_create("--interop-dir", args.interop_output_path, hostname, permission=os.W_OK|os.X_OK)

    if args.max_bcl2fastq_threads < 1:
        msg = "Cannot run bcl2fastq with zero threads."
        martian.exit(msg)
예제 #4
0
def run_bcl2fastq(args, outs):
    input_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls")

    if args.output_path:
        outs.fastq_path = args.output_path

    output_dir = outs.fastq_path

    if args.interop_output_path:
        outs.interop_path = args.interop_output_path

    interop_dir = outs.interop_path

    martian.log_info("Running bcl2fastq on run: %s" % args.run_path)
    martian.log_info("FASTQ output dir: %s" % output_dir)

    run_info_xml = os.path.join(args.run_path, "RunInfo.xml")
    read_info, flowcell = tk_bcl.load_run_info(run_info_xml)
    if not args.bases_mask:
        use_bases_mask_val = tk_bcl.make_bases_mask_val(
            read_info,
            sample_index_read=args.si_read_type,
            dual_indexed=args.dual_indexed_samplesheet,
            ignore_dual_index=args.ignore_dual_index)
    else:
        use_bases_mask_val = args.bases_mask

    outs.file_read_types_map = tk_bcl.get_bcl2fastq_read_type_map(
        read_info,
        sample_index_read=args.si_read_type,
        dual_indexed=args.dual_indexed_samplesheet,
        ignore_dual_index=args.ignore_dual_index)

    # Determine the RTA version of the run and whether this instrument
    # requires i2 to be RC'd
    (rta_version, rc_i2_read,
     bcl_params) = tk_bcl.get_rta_version(args.run_path)
    outs.rc_i2_read = rc_i2_read
    martian.log_info("BCL folder RTA Version: %s" % rta_version)
    martian.log_info("BCL params: %s" % str(bcl_params))

    # Determine the best available bcl2fastq version to use
    # Will call martian.exit() with an error message if there isn't
    # a compatible version available
    hostname = socket.gethostname()
    (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)
    outs.bcl2fastq_version = full_ver

    martian.log_info("Using bcl2fastq version: %s" % full_ver)
    martian.log_info("RC'ing i2 read: %s" % str(rc_i2_read))

    # Restore the LD_LIBRARY_PATH set aside by sourceme.bash/shell10x.
    # Only do this for the environment in which BCL2FASTQ will run.
    new_environ = dict(os.environ)
    new_environ['LD_LIBRARY_PATH'] = os.environ['_TENX_LD_LIBRARY_PATH']

    if major_ver == tk_bcl.BCL2FASTQ_V1:
        martian.exit(
            "bcl2fastq 1.8.4 is not currently supported. Please install bcl2fastq2, or use the 10x 'demux' pipeline instead."
        )

        # configure
        cmd = [
            "configureBclToFastq.pl", "--use-bases-mask=" + use_bases_mask_val,
            "--fastq-cluster-count", "20000000", "--input-dir=" + input_dir,
            "--output-dir=" + output_dir, "--no-eamss", "--ignore-missing-bcl",
            "--ignore-missing-control", "--ignore-missing-stats",
            "--sample-sheet=" + args.samplesheet_path
        ]
        cmd += remove_deprecated_args(args.bcl2fastq1_args, major_ver,
                                      full_ver)

        martian.log_info("Running bcl2fastq v1 setup command:")
        martian.log_info(" ".join(cmd))

        outs.bcl2fastq_args = " ".join(cmd)

        try:
            ret = tk_proc.call(cmd, env=new_environ)
        except OSError:
            martian.throw(
                "configureBclToFastq.pl not found on path -- make sure you've added it to your environment"
            )

        if ret != 0:
            martian.throw("configureBclToFastq.pl failed. Exiting.")

        # Run the actual makefiles
        makefile = os.path.join(output_dir, "Makefile")
        if not os.path.exists(makefile):
            martian.throw("BclToFastq Makefile not found where expected: %s" %
                          makefile)

        martian.log_info("Running Makefile...")
        mk_cmd = ["make", "-C", output_dir, "-j", str(args.num_threads)]
        martian.log_info(" ".join(mk_cmd))
        ret = tk_proc.call(mk_cmd, env=new_environ)

        if ret > 0:
            martian.throw(
                "Running the BclToFastq Makefile failed with code: %d. Exiting"
                % ret)
        elif ret < 0:
            martian.throw("Bcl2Fastq was killed with signal %d." % ret)

    elif major_ver == tk_bcl.BCL2FASTQ_V2:
        if not os.path.exists(outs.interop_path):
            os.makedirs(outs.interop_path)
        if not os.path.exists(outs.fastq_path):
            os.makedirs(outs.fastq_path)

        # minimum-trimmed-read-length and mask-short-adapter-reads must be our call (SIs, UMIs)
        min_read_length = min([x["read_length"] for x in read_info])
        if min_read_length > 8:
            # ensure min is at sample-index, if extra base grabbed for QC purposes (I8n, for example)
            min_read_length = 8

        cmd = [
            "bcl2fastq",
            "--minimum-trimmed-read-length",
            str(min_read_length),
            "--mask-short-adapter-reads",
            str(min_read_length),
            "--create-fastq-for-index-reads",
            "--ignore-missing-positions",
            "--ignore-missing-filter",
            "--ignore-missing-bcls",
            #'-r', str(args.__threads), '-w', str(args.__threads),
            "--use-bases-mask=" + use_bases_mask_val,
            "-R",
            args.run_path,
            "--output-dir=" + output_dir,
            "--interop-dir=" + interop_dir,
            "--sample-sheet=" + args.samplesheet_path
        ]
        cmd += remove_deprecated_args(args.bcl2fastq2_args, major_ver,
                                      full_ver)
        outs.bcl2fastq_args = " ".join(cmd)

        martian.log_info("Running bcl2fastq2: %s" % (" ".join(cmd)))

        try:
            ret = tk_proc.call(cmd, env=new_environ)
        except OSError:
            martian.throw(
                "bcl2fastq not found on PATH -- make sure you've added it to your environment"
            )

        if ret > 0:
            files_path = os.path.abspath(martian.make_path('_stderr'))
            enclosing_path = os.path.dirname(os.path.dirname(files_path))
            stderr_path = os.path.join(enclosing_path, '_stderr')
            martian.exit(
                "bcl2fastq exited with an error. You may have specified an invalid command-line option. See the full error here:\n%s"
                % stderr_path)
        elif ret < 0:
            # subprocess.call returns negative code (on UNIX): bcl2fastq killed by external signal
            martian.exit("bcl2fastq was killed with signal %d." % ret)
예제 #5
0
def process_raw_ilmn_data(args, outs):
    """
    run_path must be the top-level Illumina run directory
    """
    input_dir = os.path.join(args.run_path, "Data", "Intensities", "BaseCalls")
    output_dir = outs.raw_fastq_path

    martian.log_info("Running bcl2fastq on run: %s" % args.run_path)
    martian.log_info("FASTQ output dir: %s" % output_dir)

    if not os.path.exists(args.run_path):
        martian.throw("Run directory does not exist: %s" % args.run_path)

    run_info_xml = os.path.join(args.run_path, "RunInfo.xml")
    read_info, flowcell = tk_bcl.load_run_info(run_info_xml)
    use_bases_mask_val = tk_bcl.make_bases_mask_val(read_info)

    # Determine the RTA version of the run and whether this instrument
    # requires i2 to RC'd
    (rta_version, rc_i2_read,
     bcl_params) = tk_bcl.get_rta_version(args.run_path)
    martian.log_info("BCL folder RTA Version: %s" % rta_version)
    martian.log_info("BCL params: %s" % str(bcl_params))

    # Determine the best available bcl2fastq version to use
    # Will call martian.exit() with an error message if there isn't
    # a compatible version available
    hostname = socket.gethostname()
    (major_ver, full_ver) = tk_bcl.check_bcl2fastq(hostname, rta_version)

    martian.log_info("Using bcl2fastq version: %s" % full_ver)

    tile_split = args.tile_suffix != '*'

    try:
        # Internal use only. Move aside Illumina sample sheet so
        # bcl2fastq doesn't use it. For customers, there is a pre-flight
        # check to make sure there is no sample sheet in the places
        # bcl2fastq looks for it.
        import kitten

        # Older RTA put sheet into Data/Intensities/BaseCalls while
        # newer RTA put sheet at top of the BCL folder. Check both.
        for ss_dir in [args.run_path, input_dir]:
            ilmn_sample_sheet = os.path.join(ss_dir, "SampleSheet.csv")
            mv_sample_sheet = os.path.join(ss_dir, "IlluminaSampleSheet.csv")
            if os.path.exists(ilmn_sample_sheet):
                martian.log_info("Renaming the Illumina sample sheet")
                os.rename(ilmn_sample_sheet, mv_sample_sheet)
    except ImportError:
        pass

    # Restore the LD_LIBRARY_PATH set aside by sourceme.bash/shell10x.
    # Only do this for the environment in which BCL2FASTQ will run.
    new_environ = dict(os.environ)
    new_environ['LD_LIBRARY_PATH'] = os.environ['_TENX_LD_LIBRARY_PATH']

    if major_ver == tk_bcl.BCL2FASTQ_V1:
        if tile_split:
            martian.throw(
                "Cannot support NovaSeq demux scheme on bcl2fastq v1.  Exiting."
            )

        # configure
        # write bigger fastq chunks to avoid blow-up of chunks
        cmd = [
            "configureBclToFastq.pl", "--fastq-cluster-count", "20000000",
            "--no-eamss", "--use-bases-mask=" + use_bases_mask_val,
            "--input-dir=" + input_dir, "--output-dir=" + output_dir
        ]

        martian.log_info("Running bcl2fastq setup command:")
        martian.log_info(" ".join(cmd))

        try:
            ret = tenkit.log_subprocess.call(cmd, env=new_environ)
        except OSError:
            martian.throw(
                "configureBclToFastq.pl not found on path -- make sure you've added it to your environment"
            )

        if ret != 0:
            martian.throw("configureBclToFastq.pl failed. Exiting.")

        # Run the actual makefiles
        makefile = os.path.join(output_dir, "Makefile")
        if not os.path.exists(makefile):
            martian.throw("BclToFastq Makefile not found where expected: %s" %
                          makefile)

        martian.log_info("Running Makefile...")
        mk_cmd = ["make", "-C", output_dir, "-j", str(args.num_threads)]
        martian.log_info(" ".join(mk_cmd))
        ret = tenkit.log_subprocess.call(mk_cmd, env=new_environ)

        if ret > 0:
            martian.throw(
                "running the BclToFastq Makefile failed with code: %d. Exiting"
                % ret)
        elif ret < 0:
            martian.throw("Bcl2Fastq was killed with signal %d." % ret)

    elif major_ver == tk_bcl.BCL2FASTQ_V2:
        if tile_split:
            proj_output_dir = os.path.join(output_dir,
                                           "Tile%s" % args.tile_suffix,
                                           "Project_%s" % flowcell)
        else:
            proj_output_dir = os.path.join(output_dir, "Project_%s" % flowcell)

        fastq_output_dir = os.path.join(proj_output_dir, "fastq")
        interop_output_dir = os.path.join(proj_output_dir, "interop")

        if not os.path.exists(fastq_output_dir):
            os.makedirs(fastq_output_dir)

        if not os.path.exists(interop_output_dir):
            os.makedirs(interop_output_dir)

        min_read_length = min([x["read_length"] for x in read_info])

        if tile_split:
            flowcell_info = tk_lane.get_flowcell_layout(run_info_xml)
            if flowcell_info.tile_length is None:
                martian.throw(
                    "Cannot determine tile name length from RunInfo.xml")

            tiles_regex_prefix = "[0-9]" * (flowcell_info.tile_length - 1)
            tiles_regex = "%s%s" % (tiles_regex_prefix, args.tile_suffix)
            cmd = [
                "bcl2fastq",
                "--minimum-trimmed-read-length",
                str(min_read_length),
                # PIPELINES-1140 - required in bcl2fastq 2.17 to generate correct index read fastqs
                "--mask-short-adapter-reads",
                str(min_read_length),
                # LONGRANGER-121 - ignore missing bcl data
                "--ignore-missing-bcls",
                "--ignore-missing-filter",
                "--ignore-missing-positions",
                "--ignore-missing-controls",
                '-r',
                str(args.__threads),
                '-w',
                str(args.__threads),
                # TENKIT-72 avoid CPU oversubscription
                '-p',
                str(args.__threads),
                "--use-bases-mask=" + use_bases_mask_val,
                "-R",
                args.run_path,
                "--output-dir=" + fastq_output_dir,
                "--interop-dir=" + interop_output_dir,
                "--tiles=" + tiles_regex
            ]
        else:
            cmd = [
                "bcl2fastq",
                "--minimum-trimmed-read-length",
                str(min_read_length),
                # PIPELINES-1140 - required in bcl2fastq 2.17 to generate correct index read fastqs
                "--mask-short-adapter-reads",
                str(min_read_length),
                # LONGRANGER-121 - ignore missing bcl data
                "--ignore-missing-bcls",
                "--ignore-missing-filter",
                "--ignore-missing-positions",
                "--ignore-missing-controls",
                '-r',
                str(args.__threads),
                '-w',
                str(args.__threads),
                # TENKIT-72 avoid CPU oversubscription
                '-p',
                str(args.__threads),
                "--use-bases-mask=" + use_bases_mask_val,
                "-R",
                args.run_path,
                "--output-dir=" + fastq_output_dir,
                "--interop-dir=" + interop_output_dir
            ]

        martian.log_info("Running bcl2fastq 2: %s" % (" ".join(cmd)))

        try:
            ret = tenkit.log_subprocess.call(cmd, env=new_environ)
        except OSError:
            martian.throw(
                "bcl2fastq not found on PATH -- make sure you've added it to your environment"
            )

        if ret > 0:
            martian.exit("bcl2fastq failed. Exiting.")
        elif ret < 0:
            martian.exit("bcl2fastq was killed with signal %d." % ret)

# Glob over all lanes - demultiplex handles whether to collapse them
    if tile_split:
        fastq_glob = os.path.join(output_dir, "Tile*", "Project_" + flowcell,
                                  "*", "*.fastq*")
    else:
        fastq_glob = os.path.join(output_dir, "Project_" + flowcell, "*",
                                  "*.fastq*")
    start_fastq_files = glob.glob(fastq_glob)

    # File renaming -- bcl2fastq names the reads R1, R2, R3, R4
    # Use our conventions to make them R1, I1, I2, R2, as the case may be.
    rename_fastq_files(read_info, start_fastq_files)