Exemple #1
0
def check_chemistry(name, custom_def, allowed_chems):
    check(cr_chem.check_chemistry_defs())
    check(cr_chem.check_chemistry_arg(name, allowed_chems))

    if name == cr_chem.CUSTOM_CHEMISTRY_NAME:
        check(cr_chem.check_chemistry_def(custom_def))
Exemple #2
0
def main(args, outs):
    hostname = socket.gethostname()

    print "Checking sample info..."
    ok, msg = tk_preflight.check_gem_groups(args.sample_def)
    if not ok:
        martian.exit(msg)

    print "Checking FASTQ folder..."
    for sample_def in args.sample_def:
        read_path = sample_def["read_path"]
        if not read_path.startswith('/'):
            martian.exit(
                "Specified FASTQ folder must be an absolute path: %s" %
                read_path)
        if not os.path.exists(read_path):
            martian.exit(
                "On machine: %s, specified FASTQ folder does not exist: %s" %
                (hostname, read_path))
        if not os.access(read_path, os.X_OK):
            martian.exit(
                "On machine: %s, cellranger does not have permission to open FASTQ folder: %s"
                % (hostname, read_path))
        if not os.listdir(read_path):
            martian.exit("Specified FASTQ folder is empty: " + read_path)

        lanes = sample_def["lanes"]
        if lanes is not None:
            for lane in lanes:
                if not is_int(lane):
                    martian.exit(
                        "Lanes must be a comma-separated list of numbers.")

        ok, msg = tk_preflight.check_sample_indices(sample_def)
        if not ok:
            martian.exit(msg)

    if args.reference_path is None and args.vdj_reference_path is None:
        martian.exit(
            "Must specify either reference_path or vdj_reference_path.")

    print "Checking transcriptome..."
    if args.reference_path is not None:
        ok, msg = cr_preflight.check_refdata(args.reference_path)
        if not ok:
            martian.exit(msg)

    if args.vdj_reference_path is not None:
        ok, msg = vdj_preflight.check_refdata(args.vdj_reference_path)
        if not ok:
            martian.exit(msg)

    print "Checking chemistry..."
    ok, msg = cr_chem.check_chemistry_defs()
    if not ok:
        martian.exit(msg)

    ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name)
    if not ok:
        martian.exit(msg)

    if args.chemistry_name == cr_chem.CUSTOM_CHEMISTRY_NAME:
        ok, msg = cr_chem.check_chemistry_def(args.custom_chemistry_def)
        if not ok:
            martian.exit(msg)

    # Open file handles limit - per CELLRANGER-824, only check this on the execution machine.
    # We can tell if we're on the execution machine by looking at args.check_executables
    if args.check_executables:
        print "Checking system environment..."
        ok, msg = tk_preflight.check_open_fh()
        if not ok:
            martian.exit(msg)

    print "Checking optional arguments..."
    if args.recovered_cells is not None and args.force_cells is not None:
        martian.exit(
            "Cannot specify both --force-cells and --expect-cells (or --cells) in the same run."
        )

    cr_preflight.record_package_versions()
Exemple #3
0
def main(args, outs):
    # Check chemistry restrictions
    if args.allowed_chems is not None and \
       args.chemistry_name_spec not in args.allowed_chems:
        martian.exit(
            "The chemistry name '%s' is not allowed for this pipeline. The allowed values are: %s"
            % (args.chemistry_name_spec, ', '.join(args.allowed_chems)))

    ## If chem explicitly specified, just check it and finish
    if args.chemistry_name_spec not in cr_chem.AUTO_CHEMISTRY_NAMES or \
       args.chemistry_name_spec == cr_chem.CUSTOM_CHEMISTRY_NAME:
        ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name_spec)
        if not ok:
            martian.exit(msg)

        # Write empty json
        with open(outs.summary, 'w') as f:
            json.dump({}, f)

        outs.chemistry_type = args.chemistry_name_spec
        outs.report = None
        return

    ## Run preflight checks
    try:
        run_preflight_checks(args)
    except cr_preflight.PreflightException as e:
        martian.exit(e.msg)

    ## Find the input fastqs
    chunks = find_fastqs(args.sample_def)

    chemistry_name = args.chemistry_name_spec
    report = ''
    metrics = {}

    if args.chemistry_name_spec == 'auto':
        (txome_idx,
         vdj_idx) = prepare_transcriptome_indexes(args.reference_path,
                                                  args.vdj_reference_path)

        auto_chemistries = {}
        for (idx, sd) in enumerate(args.sample_def):
            chunks = find_fastqs([sd])
            chemistry_name, report, metrics = infer_sc3p_or_sc5p(
                chunks, txome_idx, vdj_idx)
            auto_chemistries[idx] = chemistry_name

        if len(set(auto_chemistries.itervalues())) > 1:
            c = ', '.join(set(auto_chemistries.itervalues()))
            s = '\n'.join("  Sample def %d: %s" % (idx, chem)
                          for (idx,
                               chem) in sorted(auto_chemistries.iteritems()))
            martian.exit(
                "Detected conflicting chemistry types (%s).\n Please run these data separately.\n%s"
                % (c, s))

        else:
            chemistry_name = auto_chemistries[0]

    # Further refinement:
    #   - Detect the sequencing configuration for SC5P (SC5P-PE vs SC5P-R2)
    #   - Detect the sequencing configuration for SCVDJ (SCVDJ vs SCVDJ-R2)
    #
    # The chemistry/seq-config must be consistent across all sample defs
    if chemistry_name in cr_chem.AUTO_CHEMISTRY_NAMES:
        # Map (sample_def_idx, fastq_group_name) => chemistry_name
        group_chem = {}

        for sd_idx, sd in enumerate(args.sample_def):
            fq_spec = cr_fastq.FastqSpec.from_sample_def(sd)

            # Infer chemistry for each sample index/name (aka fastq group)
            for group, group_spec in fq_spec.get_group_spec_iter():
                try:
                    group_chem[(sd_idx, group)] = cr_chem.infer_chemistry(
                        chemistry_name, group_spec)

                except cr_chem.NoInputFastqsException:
                    # It's okay for a single sample index/name to be absent
                    continue

        if len(group_chem) == 0:
            martian.exit(cr_constants.NO_INPUT_FASTQS_MESSAGE)

        martian.log_info("Detected chemistries:")
        for (i, g) in group_chem.iteritems():
            martian.log_info("%s: %s" % (str(i), str(g)))

        # Check for multiple chemistry types
        if len(set(group_chem.itervalues())) > 1:
            c = ', '.join(map(str, sorted(list(set(group_chem.itervalues())))))
            s = ', '.join("Sample def %d/%s: %s" % (i, g, v)
                          for ((i, g), v) in sorted(group_chem.iteritems()))
            martian.exit(
                "Detected conflicting chemistry types (%s). Please run these data separately. %s"
                % (c, s))

        chemistry_name = group_chem.values()[0]

        report += "\nThe chemistry version or sequencing configuration is likely %s" % cr_chem.get_chemistry_description_from_name(
            chemistry_name)

    outs.chemistry_type = chemistry_name

    # Write report file
    martian.log_info(report)

    with open(outs.report, 'w') as f:
        f.write(report + "\n")

    # Write summary JSON
    metrics['chemistry'] = chemistry_name
    with open(outs.summary, 'w') as f:
        json.dump(metrics, f)
Exemple #4
0
def main(args, outs):
    # Check chemistry restrictions
    if args.allowed_chems is not None and \
       args.chemistry_name_spec not in args.allowed_chems:
        martian.exit("The chemistry name '%s' is not allowed for this pipeline. The allowed values are: %s" % (args.chemistry_name_spec, ', '.join(args.allowed_chems)))

    ## If chem explicitly specified, just check it and finish
    if args.chemistry_name_spec not in cr_chem.AUTO_CHEMISTRY_NAMES or \
       args.chemistry_name_spec == cr_chem.CUSTOM_CHEMISTRY_NAME:
        ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name_spec)
        if not ok:
            martian.exit(msg)

        # Check that there is a reasonable whitelist hit rate for explicitly set chemistries
        if args.chemistry_name_spec != cr_chem.CUSTOM_CHEMISTRY_NAME:
            for sd_idx, sd in enumerate(args.sample_def):
                fq_spec = cr_fastq.FastqSpec.from_sample_def(sd)

                # Check that chemistry correct rate is reasonable.
                for group, group_spec in fq_spec.get_group_spec_iter():
                    res = cr_chem.check_whitelist_match(args.chemistry_name_spec, group_spec)
                    if res is not None:
                        martian.exit(res)

        # Write empty json
        with open(outs.summary, 'w') as f:
            json.dump({}, f)

        outs.chemistry_type = args.chemistry_name_spec
        outs.report = None
        return

    ## Run preflight checks
    try:
        run_preflight_checks(args)
    except cr_preflight.PreflightException as e:
        martian.exit(e.msg)

    ## Find the input fastqs
    # 'count' requires library_type to be set. 'vdj' doesn't require a library_type, but only supports VDJ libraries, so let any sample_def entries
    # that don't have library_type set into the detection loop.
    detect_library_types = [cr_libraries.GENE_EXPRESSION_LIBRARY_TYPE, cr_libraries.VDJ_LIBRARY_TYPE, None]
    gex_or_vdj_defs = [x for x in args.sample_def if x.get("library_type") in detect_library_types]

    chunks = find_fastqs(gex_or_vdj_defs)

    chemistry_name = args.chemistry_name_spec
    report = ''
    metrics = {}

    if args.chemistry_name_spec == 'auto':
        (txome_idx, vdj_idx) = prepare_transcriptome_indexes(args.reference_path, args.vdj_reference_path)

        auto_chemistries = {}
        for (idx, sd) in enumerate(gex_or_vdj_defs):
            chunks = find_fastqs([sd])

            sd_report = "\nDetect Report -- %s (%s):\n" % (sd["read_path"], sd.get("library_type"))
            chemistry_name, _report, metrics = infer_sc3p_or_sc5p(chunks, txome_idx, vdj_idx)
            sd_report += _report
            report += sd_report
            auto_chemistries[idx] = chemistry_name
            if not chemistry_name:
                err_msg = ("Unable to detect the chemistry for the following dataset. "
                           "Please validate it and/or specify the chemistry via the --chemistry argument.\n"
                           + sd_report)
                martian.exit(err_msg)


        if len(set(auto_chemistries.itervalues())) > 1:
            c = ', '.join(map(str, set(auto_chemistries.itervalues())))
            s = '\n'.join("  Sample def %d: %s" % (idx, chem) for (idx, chem) in sorted(auto_chemistries.iteritems()))

            any_failed = any(c is None for c in auto_chemistries.itervalues())

            if not any_failed:
                martian.exit("Detected conflicting chemistry types (%s). Please run these data separately. %s" % (c, s))
            else:
                martian.exit("Detected conflicting chemistry types (%s). Please run these data separately and/or specify the chemistry via the --chemistry argument. %s" % (c, s))

        else:
            chemistry_name = auto_chemistries[0]


    # Further refinement:
    #   - Detect the sequencing configuration for SC5P (SC5P-PE vs SC5P-R2)
    #   - Detect the sequencing configuration for SCVDJ (SCVDJ vs SCVDJ-R2)
    #
    # The chemistry/seq-config must be consistent across all sample defs
    if chemistry_name in cr_chem.AUTO_CHEMISTRY_NAMES:
        # Map (sample_def_idx, fastq_group_name) => chemistry_name
        group_chem = {}
        group_exception = {}

        for sd_idx, sd in enumerate(args.sample_def):
            fq_spec = cr_fastq.FastqSpec.from_sample_def(sd)

            # Infer chemistry for each sample index/name (aka fastq group)
            for group, group_spec in fq_spec.get_group_spec_iter():
                try:
                    group_chem[(sd_idx, group)] = cr_chem.infer_chemistry(chemistry_name, group_spec)

                except cr_chem.NoInputFastqsException:
                    # It's okay for a single sample index/name to be absent
                    continue

                except cr_chem.NoChemistryFoundException as e:
                    # It's okay for a single sample index to be unclassifiable
                    group_chem[(sd_idx, group)] = None
                    group_exception[(sd_idx, group)] = e
                    continue

        if len(group_chem) == 0:
            # Could not find any FASTQs
            martian.exit(cr_constants.NO_INPUT_FASTQS_MESSAGE)

        martian.log_info("Detected chemistries:")
        for (i, g) in group_chem.iteritems():
            martian.log_info("%s: %s" % (str(i), str(g)))

        found_chemistries = filter(lambda x: x is not None, group_chem.itervalues())

        # Check for zero chemistry types
        if len(found_chemistries) == 0:
            s = ', '.join("Sample def %d/%s: %s" % (i,g,e) for ((i,g),e) in sorted(group_exception.iteritems()))
            martian.exit("Unable to auto-detect chemistry. %s" % s)

        # Check for multiple chemistry types
        if len(set(found_chemistries)) > 1:
            detected_chemistries = map(str, sorted(list(set(group_chem.itervalues()))))
            c = ', '.join(detected_chemistries)
            s = ', '.join("Sample def %d/%s: %s" % (i,g,v) for ((i,g),v) in sorted(group_chem.iteritems()))

            any_failed = any(c is None for c in group_chem.itervalues())

            if set(detected_chemistries) == set(["SC5P-PE", "SC5P-R2"]):
                msg = "'cellranger count' doesn't support a mixture of 5' paired end (SC5P-PE) and 5' R2 (SC5P-R2) read types. "
                msg += "To process this combination of data, you will need to use 5' single-end mode. Specify '--chemistry SC5P-R2' on the 'cellranger count' command line."
                martian.exit(msg)

            if not any_failed:
                martian.exit("Detected conflicting chemistry types (%s). Please run these data separately. %s" % (c, s))
            else:
                martian.exit("Detected conflicting chemistry types (%s). Please run these data separately and/or specify the chemistry via the --chemistry argument. %s" % (c, s))

        chemistry_name = found_chemistries[0]

        report += "\nThe chemistry version or sequencing configuration is likely %s" % cr_chem.get_chemistry_description_from_name(chemistry_name)

    outs.chemistry_type = chemistry_name

    # Write report file
    martian.log_info(report)

    with open(outs.report, 'w') as f:
        f.write(report + "\n")

    # Write summary JSON
    metrics['chemistry'] = chemistry_name
    with open(outs.summary, 'w') as f:
        json.dump(metrics, f)

    # Check the read-length arguments to make sure they're compatible with the selected chemistry.
    msg = cr_preflight.check_read_lengths_vs_chemistry(chemistry_name,
                                                 args.allowed_chems, 
                                                 args.r1_length,
                                                 args.r2_length)
    if msg is not None:
        martian.exit(msg)