def __init__(self, config, record, quality_offset): self.quality_offset = quality_offset self.files = {} self.stats = None self.nodes = () tags = record["Tags"] self.folder = os.path.join(config.destination, tags["Target"], "reads", tags["SM"], tags["LB"], tags["PU_cur"]) lane_type = record.get("Type") if lane_type == "Raw": self._init_raw_reads(config, record) elif lane_type == "Trimmed": self.files.update(record["Data"]) else: assert False, "Unexpected data type in Reads(): %s" % (repr(lane_type)) # Check Quality Score # if not self._check_raw_read_quality(record): # assert False, "Quality Scores do not match: %s" % (repr(record["Data"])) for name in record["Options"]["ExcludeReads"]: self.files.pop(name, None) if config.allow_missing_input_files and self.nodes: input_missing = missing_files(self.nodes[0].input_files) output_missing = missing_files(self.nodes[0].output_files) if input_missing and not output_missing: self.nodes = ()
def __init__(self, config, record, quality_offset): self.quality_offset = quality_offset self.files = {} self.stats = None self.nodes = () tags = record["Tags"] self.folder = os.path.join(config.destination, tags["Target"], "reads", tags["SM"], tags["LB"], tags["PU_cur"]) lane_type = record.get("Type") if lane_type == "Raw": self._init_raw_reads(config, record) elif lane_type == "Trimmed": self.files.update(record["Data"]) else: assert False, "Unexpected data type in Reads(): %s" % ( repr(lane_type)) # Check Quality Score # if not self._check_raw_read_quality(record): # assert False, "Quality Scores do not match: %s" % (repr(record["Data"])) for name in record["Options"]["ExcludeReads"]: self.files.pop(name, None) if config.allow_missing_input_files and self.nodes: input_missing = missing_files(self.nodes[0].input_files) output_missing = missing_files(self.nodes[0].output_files) if input_missing and not output_missing: self.nodes = ()
def _split_lanes_by_filenames(makefile): iterator = _iterate_over_records(makefile) for (target, sample, library, barcode, record) in iterator: if record["Type"] == "Raw": template = record["Data"] record["Data"] = files = paths.collect_files(template) split = record["Options"]["SplitLanesByFilenames"] if (split == True) or (isinstance(split, list) and (barcode in split)): if any(missing_files(file_set) for file_set in files.itervalues()): raise MakefileError("Unable to split by filename for " "search-string '%s', did not find any " "files; please verify that the path" "is correct and update the makefile." % template) elif any(len(v) > 1 for v in files.itervalues()): template = makefile["Targets"][target][sample][library].pop(barcode) keys = ("SE",) if ("SE" in files) else ("PE_1", "PE_2") input_files = [files[key] for key in keys] input_files_iter = itertools.izip_longest(*input_files) for (index, filenames) in enumerate(input_files_iter, start=1): assert len(filenames) == len(keys) assert len(filenames[0]) == len(filenames[-1]) new_barcode = "%s_%03i" % (barcode, index) current = copy.deepcopy(template) current["Data"] = dict((key, [filename]) for (key, filename) in zip(keys, filenames)) current["Tags"]["PU_cur"] = new_barcode makefile["Targets"][target][sample][library][new_barcode] = current
def _split_lanes_by_filenames(makefile): for (target, sample, library, barcode, record) in _iterate_over_records(makefile): if record["Type"] == "Raw": template = record["Data"] record["Data"] = files = paths.collect_files(template) split = record["Options"]["SplitLanesByFilenames"] if (split == True) or (isinstance(split, list) and (barcode in split)): if any(missing_files(file_set) for file_set in files.itervalues()): raise MakefileError("Unable to split by filename for search-string '%s', did not find files" % template) elif any(len(v) > 1 for v in files.itervalues()): template = makefile["Targets"][target][sample][library].pop(barcode) keys = ("SE",) if ("SE" in files) else ("PE_1", "PE_2") input_files = [files[key] for key in keys] input_files_iter = itertools.izip_longest(*input_files) for (index, filenames) in enumerate(input_files_iter, start = 1): assert len(filenames) == len(keys) assert len(filenames[0]) == len(filenames[-1]) new_barcode = "%s_%03i" % (barcode, index) current = copy.deepcopy(template) current["Data"] = dict((key, [filename]) for (key, filename) in zip(keys, filenames)) current["Tags"]["PU_cur"] = new_barcode makefile["Targets"][target][sample][library][new_barcode] = current
def is_done(self): """Returns true if all subnodes of this node are done, and if all output files of this node exists (empty files are considered as valid). If the node doesn't produce output files, it is always considered done by. To change this behavior, override the 'is_done' property""" if not all(node.is_done for node in self.subnodes): return False elif fileutils.missing_files(self.output_files): return False return True
def _check_bwa_prefix(prefix): """Checks that a given prefix is compatible with the currently installed version of BWA. This is required in order to allow auto-indexing of prefixes, as indexes produced by v0.5.x and by 0.6+ are not only incompatible, but differs in the files produced, with 0.5.x producing a handful of additional files. As a consequence, simply using normal input-file dependencies would result in prefixes being re-indexed if the version of BWA was changed from 0.6+ to 0.5.x, and in failures during runtime if the version was changed from 0.5.x to 0.6+. This function treats that a difference in the version of BWA installed and the version implied by the prefix files is an error, and therefore requires user intervention.""" if prefix in _PREFIXES_CHECKED: return _PREFIXES_CHECKED.add(prefix) try: bwa_version = BWA_VERSION.version except versions.VersionRequirementError: return # Ignored here, reported elsewhere # Files unique to v0.5.x v05x_files = set((prefix + ext) for ext in (".rbwt", ".rpac", ".rsa")) # Files common to v0.5.x, v0.6.x, and v0.7.x common_files = set( (prefix + ext) for ext in (".amb", ".ann", ".bwt", ".pac", ".sa")) all_files = v05x_files | common_files current_files = all_files - set(missing_files(all_files)) expected_version = None if (current_files & common_files): if bwa_version >= (0, 6, 0): if (current_files & v05x_files): expected_version = "v0.5.x" elif bwa_version < (0, 6, 0): if not (current_files & v05x_files): expected_version = "v0.6.x or later" if expected_version: raise NodeError("BWA version is v%s, but prefix appears to be created using %s!\n" " Your copy of BWA may have changed, or you may be using the wrong\n" " prefix. To resolve this issue, either change your prefix, re-install\n" " BWA %s, or remove the prefix files at\n" " $ ls %s.*" \ % (".".join(map(str, bwa_version)), expected_version, expected_version, prefix))
def _check_bwa_prefix(prefix): """Checks that a given prefix is compatible with the currently installed version of BWA. This is required in order to allow auto-indexing of prefixes, as indexes produced by v0.5.x and by 0.6+ are not only incompatible, but differs in the files produced, with 0.5.x producing a handful of additional files. As a consequence, simply using normal input-file dependencies would result in prefixes being re-indexed if the version of BWA was changed from 0.6+ to 0.5.x, and in failures during runtime if the version was changed from 0.5.x to 0.6+. This function treats that a difference in the version of BWA installed and the version implied by the prefix files is an error, and therefore requires user intervention.""" if prefix in _PREFIXES_CHECKED: return _PREFIXES_CHECKED.add(prefix) try: bwa_version = BWA_VERSION.version except versions.VersionRequirementError: return # Ignored here, reported elsewhere # Files unique to v0.5.x v05x_files = set((prefix + ext) for ext in (".rbwt", ".rpac", ".rsa")) # Files common to v0.5.x, v0.6.x, and v0.7.x common_files = set((prefix + ext) for ext in (".amb", ".ann", ".bwt", ".pac", ".sa")) all_files = v05x_files | common_files current_files = all_files - set(missing_files(all_files)) expected_version = None if (current_files & common_files): if bwa_version >= (0, 6, 0): if (current_files & v05x_files): expected_version = "v0.5.x" elif bwa_version < (0, 6, 0): if not (current_files & v05x_files): expected_version = "v0.6.x or later" if expected_version: raise NodeError("BWA version is v%s, but prefix appears to be created using %s!\n" " Your copy of BWA may have changed, or you may be using the wrong\n" " prefix. To resolve this issue, either change your prefix, re-install\n" " BWA %s, or remove the prefix files at\n" " $ ls %s.*" \ % (".".join(map(str, bwa_version)), expected_version, expected_version, prefix))
def _check_for_missing_files(self, filenames, description): missing_files = fileutils.missing_files(filenames) if missing_files: message = "Missing %s files for command:\n\t- Command: %s\n\t- Files: %s" \ % (description, self, "\n\t ".join(missing_files)) raise NodeError(message)
def test_missing_files__mixed_files(): files = ["tests/data/missing_file_1", "tests/data/empty_file_1"] result = ["tests/data/missing_file_1"] assert_equal(missing_files(files), result)
def test_missing_files__file_doesnt_exist(): assert_equal(missing_files(["tests/data/missing_file_1"]), ["tests/data/missing_file_1"])
def test_missing_files__file_exists(): assert_equal(missing_files(["tests/data/empty_file_1"]), [])