Beispiel #1
0
def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (record["Name"], record["Sample"],
                   record["Library"], record["Contig"])
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = dict(READGROUP_TEMPLATE)
                subtable["Size"] = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable["Size"]) == int(record["Size"])
            for key in READGROUP_TEMPLATE:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))
Beispiel #2
0
def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (record["Name"], record["Sample"],
                   record["Library"], record["Contig"])
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = ReadGroup()
                subtable.Size = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable.Size) == int(record["Size"])
            for key in ReadGroup.__slots__:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))
Beispiel #3
0
def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (
                record["Name"],
                record["Sample"],
                record["Library"],
                record["Contig"],
            )
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = ReadGroup()
                subtable.Size = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable.Size) == int(record["Size"])
            for key in ReadGroup.__slots__:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))
Beispiel #4
0
def _read_max_depth(filename, prefix, sample):
    if filename in _DEPTHS_CACHE:
        return _DEPTHS_CACHE[filename]

    max_depth = None
    max_depths = {}
    try:
        with open(filename) as handle:
            for row in parse_padded_table(handle):
                if row["Name"] != "*" and \
                        row["Sample"] == "*" and \
                        row["Library"] == "*" and \
                        row["Contig"] == "*":

                    if row["Name"] in max_depths:
                        raise MakefileError("Depth histogram %r contains "
                                            "multiple 'MaxDepth' records for "
                                            "sample %r; please rebuild!"
                                            % (filename, row["Name"]))

                    max_depths[row["Name"]] = row["MaxDepth"]
    except (OSError, IOError), error:
        raise MakefileError("Error reading depth-histogram (%s): %s"
                            % (filename, error))
Beispiel #5
0
def _read_max_depth(filename, prefix, sample):
    if filename in _DEPTHS_CACHE:
        return _DEPTHS_CACHE[filename]

    max_depth = None
    max_depths = {}
    try:
        with open(filename) as handle:
            for row in parse_padded_table(handle):
                if row["Name"] != "*" and \
                        row["Sample"] == "*" and \
                        row["Library"] == "*" and \
                        row["Contig"] == "*":

                    if row["Name"] in max_depths:
                        raise MakefileError("Depth histogram %r contains "
                                            "multiple 'MaxDepth' records for "
                                            "sample %r; please rebuild!"
                                            % (filename, row["Name"]))

                    max_depths[row["Name"]] = row["MaxDepth"]
    except (OSError, IOError), error:
        raise MakefileError("Error reading depth-histogram (%s): %s"
                            % (filename, error))
Beispiel #6
0
def _parse_padded_table(*args, **kwargs):
    return list(parse_padded_table(*args, **kwargs))
Beispiel #7
0
def _parse_padded_table(*args, **kwargs):
    return list(parse_padded_table(*args, **kwargs))
Beispiel #8
0
def _read_max_depth(filename, prefix, sample):
    if filename in _DEPTHS_CACHE:
        return _DEPTHS_CACHE[filename]

    max_depth = None
    max_depths = {}
    try:
        with open(filename) as handle:
            for row in parse_padded_table(handle):
                if (row["Name"] != "*" and row["Sample"] == "*"
                        and row["Library"] == "*" and row["Contig"] == "*"):

                    if row["Name"] in max_depths:
                        raise MakefileError("Depth histogram %r contains "
                                            "multiple 'MaxDepth' records for "
                                            "sample %r; please rebuild!" %
                                            (filename, row["Name"]))

                    max_depths[row["Name"]] = row["MaxDepth"]
    except (OSError, IOError) as error:
        raise MakefileError("Error reading depth-histogram (%s): %s" %
                            (filename, error))

    log = logging.getLogger(__name__)
    if sample in max_depths:
        max_depth = max_depths[sample]
    else:
        name_counts = {}
        name_mapping = {}
        for cand_sample in max_depths:
            name = cand_sample.split(".", 1)[0]
            name_mapping[name] = cand_sample
            name_counts[name] = name_counts.get(name, 0) + 1

        if name_mapping.get(sample) == 1:
            # Sample name (with some extensions) found
            # This is typical if 'paleomix depths' has been run manually.
            max_depth = max_depths[name_mapping[sample]]
        elif len(max_depths) == 1:
            # Just one sampel in the depth histogram; even though it does not
            # match, we assuem that this is the correct table. This is because
            # manually generating files / renaming files would otherwise cause
            # failure when using 'MaxDepth: auto'.
            ((cand_sample, max_depth), ) = max_depths.items()
            log.warning(
                "Name in depths file not as expected; found %r, not %r:",
                cand_sample,
                sample,
            )

    if max_depth is None:
        raise MakefileError(
            "MaxDepth for %r not found in depth-histogram: %r" %
            (sample, filename))
    elif max_depth == "NA":
        raise MakefileError("MaxDepth is not calculated for sample %r; "
                            "cannot determine MaxDepth values automatically." %
                            (filename, ))
    elif not max_depth.isdigit():
        raise MakefileError("MaxDepth is not a valid for sample %r in %r; "
                            "expected integer, found %r." %
                            (sample, filename, max_depth))

    max_depth = int(max_depth)

    log.info("%s.%s = %i", sample, prefix, max_depth)
    _DEPTHS_CACHE[filename] = max_depth
    return max_depth