def check_chemistry(name, custom_def, allowed_chems): check(cr_chem.check_chemistry_defs()) check(cr_chem.check_chemistry_arg(name, allowed_chems)) if name == cr_chem.CUSTOM_CHEMISTRY_NAME: check(cr_chem.check_chemistry_def(custom_def))
def main(args, outs): hostname = socket.gethostname() print "Checking sample info..." ok, msg = tk_preflight.check_gem_groups(args.sample_def) if not ok: martian.exit(msg) print "Checking FASTQ folder..." for sample_def in args.sample_def: read_path = sample_def["read_path"] if not read_path.startswith('/'): martian.exit( "Specified FASTQ folder must be an absolute path: %s" % read_path) if not os.path.exists(read_path): martian.exit( "On machine: %s, specified FASTQ folder does not exist: %s" % (hostname, read_path)) if not os.access(read_path, os.X_OK): martian.exit( "On machine: %s, cellranger does not have permission to open FASTQ folder: %s" % (hostname, read_path)) if not os.listdir(read_path): martian.exit("Specified FASTQ folder is empty: " + read_path) lanes = sample_def["lanes"] if lanes is not None: for lane in lanes: if not is_int(lane): martian.exit( "Lanes must be a comma-separated list of numbers.") ok, msg = tk_preflight.check_sample_indices(sample_def) if not ok: martian.exit(msg) if args.reference_path is None and args.vdj_reference_path is None: martian.exit( "Must specify either reference_path or vdj_reference_path.") print "Checking transcriptome..." if args.reference_path is not None: ok, msg = cr_preflight.check_refdata(args.reference_path) if not ok: martian.exit(msg) if args.vdj_reference_path is not None: ok, msg = vdj_preflight.check_refdata(args.vdj_reference_path) if not ok: martian.exit(msg) print "Checking chemistry..." ok, msg = cr_chem.check_chemistry_defs() if not ok: martian.exit(msg) ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name) if not ok: martian.exit(msg) if args.chemistry_name == cr_chem.CUSTOM_CHEMISTRY_NAME: ok, msg = cr_chem.check_chemistry_def(args.custom_chemistry_def) if not ok: martian.exit(msg) # Open file handles limit - per CELLRANGER-824, only check this on the execution machine. # We can tell if we're on the execution machine by looking at args.check_executables if args.check_executables: print "Checking system environment..." ok, msg = tk_preflight.check_open_fh() if not ok: martian.exit(msg) print "Checking optional arguments..." if args.recovered_cells is not None and args.force_cells is not None: martian.exit( "Cannot specify both --force-cells and --expect-cells (or --cells) in the same run." ) cr_preflight.record_package_versions()
def main(args, outs): # Check chemistry restrictions if args.allowed_chems is not None and \ args.chemistry_name_spec not in args.allowed_chems: martian.exit( "The chemistry name '%s' is not allowed for this pipeline. The allowed values are: %s" % (args.chemistry_name_spec, ', '.join(args.allowed_chems))) ## If chem explicitly specified, just check it and finish if args.chemistry_name_spec not in cr_chem.AUTO_CHEMISTRY_NAMES or \ args.chemistry_name_spec == cr_chem.CUSTOM_CHEMISTRY_NAME: ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name_spec) if not ok: martian.exit(msg) # Write empty json with open(outs.summary, 'w') as f: json.dump({}, f) outs.chemistry_type = args.chemistry_name_spec outs.report = None return ## Run preflight checks try: run_preflight_checks(args) except cr_preflight.PreflightException as e: martian.exit(e.msg) ## Find the input fastqs chunks = find_fastqs(args.sample_def) chemistry_name = args.chemistry_name_spec report = '' metrics = {} if args.chemistry_name_spec == 'auto': (txome_idx, vdj_idx) = prepare_transcriptome_indexes(args.reference_path, args.vdj_reference_path) auto_chemistries = {} for (idx, sd) in enumerate(args.sample_def): chunks = find_fastqs([sd]) chemistry_name, report, metrics = infer_sc3p_or_sc5p( chunks, txome_idx, vdj_idx) auto_chemistries[idx] = chemistry_name if len(set(auto_chemistries.itervalues())) > 1: c = ', '.join(set(auto_chemistries.itervalues())) s = '\n'.join(" Sample def %d: %s" % (idx, chem) for (idx, chem) in sorted(auto_chemistries.iteritems())) martian.exit( "Detected conflicting chemistry types (%s).\n Please run these data separately.\n%s" % (c, s)) else: chemistry_name = auto_chemistries[0] # Further refinement: # - Detect the sequencing configuration for SC5P (SC5P-PE vs SC5P-R2) # - Detect the sequencing configuration for SCVDJ (SCVDJ vs SCVDJ-R2) # # The chemistry/seq-config must be consistent across all sample defs if chemistry_name in cr_chem.AUTO_CHEMISTRY_NAMES: # Map (sample_def_idx, fastq_group_name) => chemistry_name group_chem = {} for sd_idx, sd in enumerate(args.sample_def): fq_spec = cr_fastq.FastqSpec.from_sample_def(sd) # Infer chemistry for each sample index/name (aka fastq group) for group, group_spec in fq_spec.get_group_spec_iter(): try: group_chem[(sd_idx, group)] = cr_chem.infer_chemistry( chemistry_name, group_spec) except cr_chem.NoInputFastqsException: # It's okay for a single sample index/name to be absent continue if len(group_chem) == 0: martian.exit(cr_constants.NO_INPUT_FASTQS_MESSAGE) martian.log_info("Detected chemistries:") for (i, g) in group_chem.iteritems(): martian.log_info("%s: %s" % (str(i), str(g))) # Check for multiple chemistry types if len(set(group_chem.itervalues())) > 1: c = ', '.join(map(str, sorted(list(set(group_chem.itervalues()))))) s = ', '.join("Sample def %d/%s: %s" % (i, g, v) for ((i, g), v) in sorted(group_chem.iteritems())) martian.exit( "Detected conflicting chemistry types (%s). Please run these data separately. %s" % (c, s)) chemistry_name = group_chem.values()[0] report += "\nThe chemistry version or sequencing configuration is likely %s" % cr_chem.get_chemistry_description_from_name( chemistry_name) outs.chemistry_type = chemistry_name # Write report file martian.log_info(report) with open(outs.report, 'w') as f: f.write(report + "\n") # Write summary JSON metrics['chemistry'] = chemistry_name with open(outs.summary, 'w') as f: json.dump(metrics, f)
def main(args, outs): # Check chemistry restrictions if args.allowed_chems is not None and \ args.chemistry_name_spec not in args.allowed_chems: martian.exit("The chemistry name '%s' is not allowed for this pipeline. The allowed values are: %s" % (args.chemistry_name_spec, ', '.join(args.allowed_chems))) ## If chem explicitly specified, just check it and finish if args.chemistry_name_spec not in cr_chem.AUTO_CHEMISTRY_NAMES or \ args.chemistry_name_spec == cr_chem.CUSTOM_CHEMISTRY_NAME: ok, msg = cr_chem.check_chemistry_arg(args.chemistry_name_spec) if not ok: martian.exit(msg) # Check that there is a reasonable whitelist hit rate for explicitly set chemistries if args.chemistry_name_spec != cr_chem.CUSTOM_CHEMISTRY_NAME: for sd_idx, sd in enumerate(args.sample_def): fq_spec = cr_fastq.FastqSpec.from_sample_def(sd) # Check that chemistry correct rate is reasonable. for group, group_spec in fq_spec.get_group_spec_iter(): res = cr_chem.check_whitelist_match(args.chemistry_name_spec, group_spec) if res is not None: martian.exit(res) # Write empty json with open(outs.summary, 'w') as f: json.dump({}, f) outs.chemistry_type = args.chemistry_name_spec outs.report = None return ## Run preflight checks try: run_preflight_checks(args) except cr_preflight.PreflightException as e: martian.exit(e.msg) ## Find the input fastqs # 'count' requires library_type to be set. 'vdj' doesn't require a library_type, but only supports VDJ libraries, so let any sample_def entries # that don't have library_type set into the detection loop. detect_library_types = [cr_libraries.GENE_EXPRESSION_LIBRARY_TYPE, cr_libraries.VDJ_LIBRARY_TYPE, None] gex_or_vdj_defs = [x for x in args.sample_def if x.get("library_type") in detect_library_types] chunks = find_fastqs(gex_or_vdj_defs) chemistry_name = args.chemistry_name_spec report = '' metrics = {} if args.chemistry_name_spec == 'auto': (txome_idx, vdj_idx) = prepare_transcriptome_indexes(args.reference_path, args.vdj_reference_path) auto_chemistries = {} for (idx, sd) in enumerate(gex_or_vdj_defs): chunks = find_fastqs([sd]) sd_report = "\nDetect Report -- %s (%s):\n" % (sd["read_path"], sd.get("library_type")) chemistry_name, _report, metrics = infer_sc3p_or_sc5p(chunks, txome_idx, vdj_idx) sd_report += _report report += sd_report auto_chemistries[idx] = chemistry_name if not chemistry_name: err_msg = ("Unable to detect the chemistry for the following dataset. " "Please validate it and/or specify the chemistry via the --chemistry argument.\n" + sd_report) martian.exit(err_msg) if len(set(auto_chemistries.itervalues())) > 1: c = ', '.join(map(str, set(auto_chemistries.itervalues()))) s = '\n'.join(" Sample def %d: %s" % (idx, chem) for (idx, chem) in sorted(auto_chemistries.iteritems())) any_failed = any(c is None for c in auto_chemistries.itervalues()) if not any_failed: martian.exit("Detected conflicting chemistry types (%s). Please run these data separately. %s" % (c, s)) else: martian.exit("Detected conflicting chemistry types (%s). Please run these data separately and/or specify the chemistry via the --chemistry argument. %s" % (c, s)) else: chemistry_name = auto_chemistries[0] # Further refinement: # - Detect the sequencing configuration for SC5P (SC5P-PE vs SC5P-R2) # - Detect the sequencing configuration for SCVDJ (SCVDJ vs SCVDJ-R2) # # The chemistry/seq-config must be consistent across all sample defs if chemistry_name in cr_chem.AUTO_CHEMISTRY_NAMES: # Map (sample_def_idx, fastq_group_name) => chemistry_name group_chem = {} group_exception = {} for sd_idx, sd in enumerate(args.sample_def): fq_spec = cr_fastq.FastqSpec.from_sample_def(sd) # Infer chemistry for each sample index/name (aka fastq group) for group, group_spec in fq_spec.get_group_spec_iter(): try: group_chem[(sd_idx, group)] = cr_chem.infer_chemistry(chemistry_name, group_spec) except cr_chem.NoInputFastqsException: # It's okay for a single sample index/name to be absent continue except cr_chem.NoChemistryFoundException as e: # It's okay for a single sample index to be unclassifiable group_chem[(sd_idx, group)] = None group_exception[(sd_idx, group)] = e continue if len(group_chem) == 0: # Could not find any FASTQs martian.exit(cr_constants.NO_INPUT_FASTQS_MESSAGE) martian.log_info("Detected chemistries:") for (i, g) in group_chem.iteritems(): martian.log_info("%s: %s" % (str(i), str(g))) found_chemistries = filter(lambda x: x is not None, group_chem.itervalues()) # Check for zero chemistry types if len(found_chemistries) == 0: s = ', '.join("Sample def %d/%s: %s" % (i,g,e) for ((i,g),e) in sorted(group_exception.iteritems())) martian.exit("Unable to auto-detect chemistry. %s" % s) # Check for multiple chemistry types if len(set(found_chemistries)) > 1: detected_chemistries = map(str, sorted(list(set(group_chem.itervalues())))) c = ', '.join(detected_chemistries) s = ', '.join("Sample def %d/%s: %s" % (i,g,v) for ((i,g),v) in sorted(group_chem.iteritems())) any_failed = any(c is None for c in group_chem.itervalues()) if set(detected_chemistries) == set(["SC5P-PE", "SC5P-R2"]): msg = "'cellranger count' doesn't support a mixture of 5' paired end (SC5P-PE) and 5' R2 (SC5P-R2) read types. " msg += "To process this combination of data, you will need to use 5' single-end mode. Specify '--chemistry SC5P-R2' on the 'cellranger count' command line." martian.exit(msg) if not any_failed: martian.exit("Detected conflicting chemistry types (%s). Please run these data separately. %s" % (c, s)) else: martian.exit("Detected conflicting chemistry types (%s). Please run these data separately and/or specify the chemistry via the --chemistry argument. %s" % (c, s)) chemistry_name = found_chemistries[0] report += "\nThe chemistry version or sequencing configuration is likely %s" % cr_chem.get_chemistry_description_from_name(chemistry_name) outs.chemistry_type = chemistry_name # Write report file martian.log_info(report) with open(outs.report, 'w') as f: f.write(report + "\n") # Write summary JSON metrics['chemistry'] = chemistry_name with open(outs.summary, 'w') as f: json.dump(metrics, f) # Check the read-length arguments to make sure they're compatible with the selected chemistry. msg = cr_preflight.check_read_lengths_vs_chemistry(chemistry_name, args.allowed_chems, args.r1_length, args.r2_length) if msg is not None: martian.exit(msg)