Esempio n. 1
0
    def __init__(self, config, record, quality_offset):
        self.quality_offset = quality_offset
        self.files = {}
        self.stats = None
        self.nodes = ()

        tags = record["Tags"]
        self.folder = os.path.join(config.destination, tags["Target"], "reads",
                                   tags["SM"], tags["LB"], tags["PU_cur"])

        lane_type = record.get("Type")
        if lane_type == "Raw":
            self._init_raw_reads(config, record)
        elif lane_type == "Trimmed":
            self.files.update(record["Data"])
        else:
            assert False, "Unexpected data type in Reads(): %s" % (repr(lane_type))

        # Check Quality Score
#        if not self._check_raw_read_quality(record):
#            assert False, "Quality Scores do not match: %s" % (repr(record["Data"]))

        for name in record["Options"]["ExcludeReads"]:
            self.files.pop(name, None)

        if config.allow_missing_input_files and self.nodes:
            input_missing  = missing_files(self.nodes[0].input_files)
            output_missing = missing_files(self.nodes[0].output_files)
            if input_missing and not output_missing:
                    self.nodes = ()
Esempio n. 2
0
    def __init__(self, config, record, quality_offset):
        self.quality_offset = quality_offset
        self.files = {}
        self.stats = None
        self.nodes = ()

        tags = record["Tags"]
        self.folder = os.path.join(config.destination, tags["Target"], "reads",
                                   tags["SM"], tags["LB"], tags["PU_cur"])

        lane_type = record.get("Type")
        if lane_type == "Raw":
            self._init_raw_reads(config, record)
        elif lane_type == "Trimmed":
            self.files.update(record["Data"])
        else:
            assert False, "Unexpected data type in Reads(): %s" % (
                repr(lane_type))

        # Check Quality Score
#        if not self._check_raw_read_quality(record):
#            assert False, "Quality Scores do not match: %s" % (repr(record["Data"]))

        for name in record["Options"]["ExcludeReads"]:
            self.files.pop(name, None)

        if config.allow_missing_input_files and self.nodes:
            input_missing = missing_files(self.nodes[0].input_files)
            output_missing = missing_files(self.nodes[0].output_files)
            if input_missing and not output_missing:
                self.nodes = ()
Esempio n. 3
0
def _split_lanes_by_filenames(makefile):
    iterator = _iterate_over_records(makefile)
    for (target, sample, library, barcode, record) in iterator:
        if record["Type"] == "Raw":
            template = record["Data"]
            record["Data"] = files = paths.collect_files(template)
            split = record["Options"]["SplitLanesByFilenames"]

            if (split == True) or (isinstance(split, list) and (barcode in split)):
                if any(missing_files(file_set) for file_set in files.itervalues()):
                    raise MakefileError("Unable to split by filename for "
                                        "search-string '%s', did not find any "
                                        "files; please verify that the path"
                                        "is correct and update the makefile."
                                        % template)
                elif any(len(v) > 1 for v in files.itervalues()):
                    template = makefile["Targets"][target][sample][library].pop(barcode)
                    keys = ("SE",) if ("SE" in files) else ("PE_1", "PE_2")

                    input_files = [files[key] for key in keys]
                    input_files_iter = itertools.izip_longest(*input_files)
                    for (index, filenames) in enumerate(input_files_iter, start=1):
                        assert len(filenames) == len(keys)
                        assert len(filenames[0]) == len(filenames[-1])
                        new_barcode = "%s_%03i" % (barcode, index)

                        current = copy.deepcopy(template)
                        current["Data"] = dict((key, [filename]) for (key, filename) in zip(keys, filenames))
                        current["Tags"]["PU_cur"] = new_barcode

                        makefile["Targets"][target][sample][library][new_barcode] = current
Esempio n. 4
0
def _split_lanes_by_filenames(makefile):
    for (target, sample, library, barcode, record) in _iterate_over_records(makefile):
        if record["Type"] == "Raw":
            template = record["Data"]
            record["Data"] = files = paths.collect_files(template)
            split = record["Options"]["SplitLanesByFilenames"]

            if (split == True) or (isinstance(split, list) and (barcode in split)):
                if any(missing_files(file_set) for file_set in files.itervalues()):
                    raise MakefileError("Unable to split by filename for search-string '%s', did not find files" % template)
                elif any(len(v) > 1 for v in files.itervalues()):
                    template = makefile["Targets"][target][sample][library].pop(barcode)
                    keys = ("SE",) if ("SE" in files) else ("PE_1", "PE_2")

                    input_files = [files[key] for key in keys]
                    input_files_iter = itertools.izip_longest(*input_files)
                    for (index, filenames) in enumerate(input_files_iter, start = 1):
                        assert len(filenames) == len(keys)
                        assert len(filenames[0]) == len(filenames[-1])
                        new_barcode = "%s_%03i" % (barcode, index)

                        current = copy.deepcopy(template)
                        current["Data"] = dict((key, [filename]) for (key, filename) in zip(keys, filenames))
                        current["Tags"]["PU_cur"] = new_barcode

                        makefile["Targets"][target][sample][library][new_barcode] = current
Esempio n. 5
0
    def is_done(self):
        """Returns true if all subnodes of this node are done, and if all output
        files of this node exists (empty files are considered as valid). If the
        node doesn't produce output files, it is always considered done by. To
        change this behavior, override the 'is_done' property"""

        if not all(node.is_done for node in self.subnodes):
            return False
        elif fileutils.missing_files(self.output_files):
            return False

        return True
Esempio n. 6
0
    def is_done(self):
        """Returns true if all subnodes of this node are done, and if all output
        files of this node exists (empty files are considered as valid). If the
        node doesn't produce output files, it is always considered done by. To
        change this behavior, override the 'is_done' property"""

        if not all(node.is_done for node in self.subnodes):
            return False
        elif fileutils.missing_files(self.output_files):
            return False

        return True
Esempio n. 7
0
def _check_bwa_prefix(prefix):
    """Checks that a given prefix is compatible with the currently
    installed version of BWA. This is required in order to allow
    auto-indexing of prefixes, as indexes produced by v0.5.x and
    by 0.6+ are not only incompatible, but differs in the files
    produced, with 0.5.x producing a handful of additional files.

    As a consequence, simply using normal input-file dependencies
    would result in prefixes being re-indexed if the version of
    BWA was changed from 0.6+ to 0.5.x, and in failures during
    runtime if the version was changed from 0.5.x to 0.6+.

    This function treats that a difference in the version of BWA
    installed and the version implied by the prefix files is an
    error, and therefore requires user intervention."""
    if prefix in _PREFIXES_CHECKED:
        return
    _PREFIXES_CHECKED.add(prefix)

    try:
        bwa_version = BWA_VERSION.version
    except versions.VersionRequirementError:
        return  # Ignored here, reported elsewhere

    # Files unique to v0.5.x
    v05x_files = set((prefix + ext) for ext in (".rbwt", ".rpac", ".rsa"))
    # Files common to v0.5.x, v0.6.x, and v0.7.x
    common_files = set(
        (prefix + ext) for ext in (".amb", ".ann", ".bwt", ".pac", ".sa"))
    all_files = v05x_files | common_files
    current_files = all_files - set(missing_files(all_files))

    expected_version = None
    if (current_files & common_files):
        if bwa_version >= (0, 6, 0):
            if (current_files & v05x_files):
                expected_version = "v0.5.x"
        elif bwa_version < (0, 6, 0):
            if not (current_files & v05x_files):
                expected_version = "v0.6.x or later"

    if expected_version:
        raise NodeError("BWA version is v%s, but prefix appears to be created using %s!\n"
                        "  Your copy of BWA may have changed, or you may be using the wrong\n"
                        "  prefix. To resolve this issue, either change your prefix, re-install\n"
                        "  BWA %s, or remove the prefix files at\n"
                        "    $ ls %s.*" \
                        % (".".join(map(str, bwa_version)), expected_version, expected_version, prefix))
Esempio n. 8
0
def _check_bwa_prefix(prefix):
    """Checks that a given prefix is compatible with the currently
    installed version of BWA. This is required in order to allow
    auto-indexing of prefixes, as indexes produced by v0.5.x and
    by 0.6+ are not only incompatible, but differs in the files
    produced, with 0.5.x producing a handful of additional files.

    As a consequence, simply using normal input-file dependencies
    would result in prefixes being re-indexed if the version of
    BWA was changed from 0.6+ to 0.5.x, and in failures during
    runtime if the version was changed from 0.5.x to 0.6+.

    This function treats that a difference in the version of BWA
    installed and the version implied by the prefix files is an
    error, and therefore requires user intervention."""
    if prefix in _PREFIXES_CHECKED:
        return
    _PREFIXES_CHECKED.add(prefix)

    try:
        bwa_version = BWA_VERSION.version
    except versions.VersionRequirementError:
        return  # Ignored here, reported elsewhere

    # Files unique to v0.5.x
    v05x_files = set((prefix + ext) for ext in (".rbwt", ".rpac", ".rsa"))
    # Files common to v0.5.x, v0.6.x, and v0.7.x
    common_files  = set((prefix + ext) for ext in (".amb", ".ann", ".bwt", ".pac", ".sa"))
    all_files     = v05x_files | common_files
    current_files = all_files - set(missing_files(all_files))

    expected_version = None
    if (current_files & common_files):
        if bwa_version >= (0, 6, 0):
            if (current_files & v05x_files):
                expected_version = "v0.5.x"
        elif bwa_version < (0, 6, 0):
            if not (current_files & v05x_files):
                expected_version = "v0.6.x or later"

    if expected_version:
        raise NodeError("BWA version is v%s, but prefix appears to be created using %s!\n"
                        "  Your copy of BWA may have changed, or you may be using the wrong\n"
                        "  prefix. To resolve this issue, either change your prefix, re-install\n"
                        "  BWA %s, or remove the prefix files at\n"
                        "    $ ls %s.*" \
                        % (".".join(map(str, bwa_version)), expected_version, expected_version, prefix))
Esempio n. 9
0
 def _check_for_missing_files(self, filenames, description):
     missing_files = fileutils.missing_files(filenames)
     if missing_files:
         message = "Missing %s files for command:\n\t- Command: %s\n\t- Files: %s" \
             % (description, self, "\n\t         ".join(missing_files))
         raise NodeError(message)
Esempio n. 10
0
def test_missing_files__mixed_files():
    files = ["tests/data/missing_file_1", "tests/data/empty_file_1"]
    result = ["tests/data/missing_file_1"]

    assert_equal(missing_files(files), result)
Esempio n. 11
0
def test_missing_files__file_doesnt_exist():
    assert_equal(missing_files(["tests/data/missing_file_1"]),
                 ["tests/data/missing_file_1"])
Esempio n. 12
0
def test_missing_files__file_exists():
    assert_equal(missing_files(["tests/data/empty_file_1"]), [])
Esempio n. 13
0
 def _check_for_missing_files(self, filenames, description):
     missing_files = fileutils.missing_files(filenames)
     if missing_files:
         message = "Missing %s files for command:\n\t- Command: %s\n\t- Files: %s" \
             % (description, self, "\n\t         ".join(missing_files))
         raise NodeError(message)
Esempio n. 14
0
def test_missing_files__mixed_files():
    files = ["tests/data/missing_file_1",
             "tests/data/empty_file_1"]
    result = ["tests/data/missing_file_1"]

    assert_equal(missing_files(files), result)
Esempio n. 15
0
def test_missing_files__file_doesnt_exist():
    assert_equal(missing_files(["tests/data/missing_file_1"]),
                 ["tests/data/missing_file_1"])
Esempio n. 16
0
def test_missing_files__file_exists():
    assert_equal(missing_files(["tests/data/empty_file_1"]), [])