Python set_in Examples, paleomix.common.utilities.set_in Python Examples

Example #1

0

Show file

File: coverage.py Project: MikkelSchubert/paleomix

def create_or_get_subtable(table, subtable_key, size):
    subtable = get_in(table, subtable_key)
    if subtable is None:
        subtable = ReadGroup()
        subtable.Size = size
        set_in(table, subtable_key, subtable)
    return subtable

Example #2

0

Show file

def create_or_get_subtable(table, subtable_key, size):
    subtable = get_in(table, subtable_key)
    if subtable is None:
        subtable = ReadGroup()
        subtable.Size = size
        set_in(table, subtable_key, subtable)
    return subtable

Example #3

0

Show file

File: summary.py Project: MikkelSchubert/paleomix

    def _read_raw_bam_stats(self, table):
        for ((genome, target, sample, library), filenames) in self._in_raw_bams.iteritems():
            key = (target, sample, library)
            hits, _ = self._read_coverage_tables(key, filenames)

            value = (hits, "# Total number of hits (prior to PCR duplicate filtering)")
            set_in(table, (target, sample, library, genome, "hits_raw(%s)" % genome), value)

Example #4

0

Show file

File: coverage.py Project: muslih14/paleomix

def create_or_get_subtable(table, subtable_key, size):
    subtable = get_in(table, subtable_key)
    if subtable is None:
        subtable = dict(READGROUP_TEMPLATE)
        subtable["Size"] = size
        set_in(table, subtable_key, subtable)
    return subtable

Example #5

0

Show file

    def _read_tables(self, prefixes, genomes):
        table = {}
        self._read_reads_settings(table)
        self._read_raw_bam_stats(table)
        self._read_lib_bam_stats(table)

        for (target, samples) in table.items():
            merged_samples = {}
            for (sample, libraries) in samples.items():
                merged_libraries = {}
                for (library, subtables) in libraries.items():
                    for (tblname, subtable) in subtables.items():
                        merged_libraries[tblname] = self._merge_tables(
                            (merged_libraries.get(tblname, {}), subtable)
                        )
                        merged_samples[tblname] = self._merge_tables(
                            (merged_samples.get(tblname, {}), subtable)
                        )
                    libraries[library] = self._annotate_subtables(subtables, genomes)
                set_in(
                    table,
                    (target, sample, "*"),
                    self._annotate_subtables(merged_libraries, genomes),
                )
            set_in(
                table,
                (target, "*", "*"),
                self._annotate_subtables(merged_samples, genomes),
            )

        return table

Example #6

0

Show file

File: gtf_to_bed.py Project: MikkelSchubert/paleomix

def update_gtf_table(table, gtf, scaffolds, contig_prefix):
    # Workaround for bug in Pysam, which mis-parses individual properties
    # (e.g. exon_number) if these are not quoted. This does not apply to
    # asDict, which uses a different parsing implementation (v0.7.8).
    properties = gtf.asDict()

    gene_type = properties.get("gene_biotype")
    if gene_type is None:
        gene_type = properties.get("gene_type", "unknown_genetype")

    keys = (gene_type,
            properties["gene_id"],
            properties["transcript_id"],
            int(properties["exon_number"]),
            gtf.feature)

    record = {"contig": contig_prefix + gtf.contig,
              "start": gtf.start,
              # In pysam, 'end' equals the past-the-end position
              "end": gtf.end - 1,
              "strand": gtf.strand,
              "feature": gtf.feature,
              "transcript": properties["transcript_id"]}

    if record["contig"] in scaffolds:
        contig = scaffolds[record["contig"]]
        record["contig"] = contig["chrom"]
        record["start"] += int(contig["chromStart"])
        record["end"] += int(contig["chromStart"])

    assert not get_in(table, keys), keys
    set_in(table, keys, record)

Example #7

0

Show file

File: gtf_to_bed.py Project: jelber2/paleomix

def update_gtf_table(table, gtf, scaffolds, contig_prefix):
    # Workaround for bug in Pysam, which mis-parses individual properties
    # (e.g. exon_number) if these are not quoted. This does not apply to
    # asDict, which uses a different parsing implementation (v0.7.8).
    properties = gtf.asDict()

    gene_type = properties.get("gene_biotype")
    if gene_type is None:
        gene_type = properties.get("gene_type", "unknown_genetype")

    keys = (gene_type, properties["gene_id"], properties["transcript_id"],
            int(properties["exon_number"]), gtf.feature)

    record = {
        "contig": contig_prefix + gtf.contig,
        "start": gtf.start,
        # In pysam, 'end' equals the past-the-end position
        "end": gtf.end - 1,
        "strand": gtf.strand,
        "feature": gtf.feature,
        "transcript": properties["transcript_id"]
    }

    if record["contig"] in scaffolds:
        contig = scaffolds[record["contig"]]
        record["contig"] = contig["chrom"]
        record["start"] += int(contig["chromStart"])
        record["end"] += int(contig["chromStart"])

    assert not get_in(table, keys), keys
    set_in(table, keys, record)

Example #8

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__three_kws_in_partial_dictionary():
    value = {"Foo": {12: 0}}
    utils.set_in(value, ["Foo", 13, (1, 2)], 17)
    assert_equal(value, {"Foo": {12: 0, 13: {(1, 2): 17}}})

    value = {"Foo": {13: {"Bar": None}}}
    utils.set_in(value, ["Foo", 13, (1, 2)], 17)
    assert_equal(value, {"Foo": {13: {(1, 2): 17, "Bar": None}}})

Example #9

0

Show file

File: summary.py Project: MikkelSchubert/paleomix

    def _read_lib_bam_stats(self, table):
        for ((genome, target, sample, library), filenames) in self._in_lib_bams.iteritems():
            key = (target, sample, library)
            hits, nts = self._read_coverage_tables(key, filenames)

            value = (hits, "# Total number of hits (excluding any PCR duplicates)")
            set_in(table, (target, sample, library, genome, "hits_unique(%s)" % genome), value)
            set_in(table, (target, sample, library, genome, "hits_unique_nts(%s)" % genome), (nts, None))

Example #10

0

Show file

    def _read_raw_bam_stats(self, table):
        for ((genome, target, sample, library), filenames) in self._in_raw_bams.items():
            key = (target, sample, library)
            hits, _ = self._read_coverage_tables(key, filenames)

            set_in(
                table, (target, sample, library, genome, "hits_raw(%s)" % genome), hits
            )

Example #11

0

Show file

    def add_connection(self, node_id_a, node_id_b, blength=None):
        if (blength is not None) and float(blength) < 0:
            raise GraphError("Branch-lengths must be non-negative")
        elif (blength is not None) != self.has_branch_lengths:
            if self.has_branch_lengths is not None:
                raise GraphError(
                    "Tree contains branches with and without lengths")
            self.has_branch_lengths = blength is not None

        set_in(self.connections, (node_id_a, node_id_b), blength)
        set_in(self.connections, (node_id_b, node_id_a), blength)

Example #12

0

Show file

File: summary.py Project: MikkelSchubert/paleomix

    def _read_reads_settings(self, table):
        for ((sample, library, barcode), (filetype, filename)) in self._in_raw_read.iteritems():
            key = (self._target, sample, library, "reads", barcode)
            set_in(table, key, self._stat_read_settings(filetype, filename))

        for (_, samples) in table.iteritems():
            for (sample, libraries) in samples.iteritems():
                for (library, prefixes) in libraries.iteritems():
                    prefixes["reads"] = self._merge_tables(prefixes["reads"].values())

        return table

Example #13

0

Show file

    def _collect_clade_from(self, cache, p_node, c_node):
        c_clade = get_in(cache, (p_node, c_node), set())
        if not c_clade:
            if self.is_leaf(c_node):
                c_clade.add(c_node)

            for n_node in self.connections[c_node]:
                if n_node != p_node:
                    c_clade.update(
                        self._collect_clade_from(cache, c_node, n_node))
            set_in(cache, (p_node, c_node), frozenset(c_clade))
        return c_clade

Example #14

0

Show file

    def _read_raw_bam_stats(self, table):
        for ((genome, target, sample, library),
             filenames) in self._in_raw_bams.iteritems():
            key = (target, sample, library)
            hits, _ = self._read_coverage_tables(key, filenames)

            value = (
                hits,
                "# Total number of hits (prior to PCR duplicate filtering)")
            set_in(table,
                   (target, sample, library, genome, "hits_raw(%s)" % genome),
                   value)

Example #15

0

Show file

    def _read_reads_settings(self, table):
        for ((sample, library, barcode),
             (filetype, filename)) in self._in_raw_read.iteritems():
            key = (self._target, sample, library, "reads", barcode)
            set_in(table, key, self._stat_read_settings(filetype, filename))

        for (_, samples) in table.iteritems():
            for (sample, libraries) in samples.iteritems():
                for (library, prefixes) in libraries.iteritems():
                    prefixes["reads"] = self._merge_tables(
                        prefixes["reads"].values())

        return table

Example #16

0

Show file

    def _read_lib_bam_stats(self, table):
        for ((genome, target, sample, library),
             filenames) in self._in_lib_bams.iteritems():
            key = (target, sample, library)
            hits, nts = self._read_coverage_tables(key, filenames)

            value = (hits,
                     "# Total number of hits (excluding any PCR duplicates)")
            set_in(
                table,
                (target, sample, library, genome, "hits_unique(%s)" % genome),
                value)
            set_in(table, (target, sample, library, genome,
                           "hits_unique_nts(%s)" % genome), (nts, None))

Example #17

0

Show file

    def _read_lib_bam_stats(self, table):
        for ((genome, target, sample, library), filenames) in self._in_lib_bams.items():
            key = (target, sample, library)
            hits, nts = self._read_coverage_tables(key, filenames)

            set_in(
                table,
                (target, sample, library, genome, "hits_unique(%s)" % genome),
                hits,
            )
            set_in(
                table,
                (target, sample, library, genome, "hits_unique_nts(%s)" % genome),
                nts,
            )

Example #18

0

Show file

File: coverage.py Project: muslih14/paleomix

def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (record["Name"], record["Sample"],
                   record["Library"], record["Contig"])
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = dict(READGROUP_TEMPLATE)
                subtable["Size"] = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable["Size"]) == int(record["Size"])
            for key in READGROUP_TEMPLATE:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))

Example #19

0

Show file

File: coverage.py Project: MikkelSchubert/paleomix

def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (record["Name"], record["Sample"],
                   record["Library"], record["Contig"])
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = ReadGroup()
                subtable.Size = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable.Size) == int(record["Size"])
            for key in ReadGroup.__slots__:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))

Example #20

0

Show file

File: remap.py Project: muslih14/paleomix

    def write_records(self, records):
        record_cache = {}
        for record in records:
            num = 0
            if record.is_read1:
                num = 1
            elif record.is_read2:
                num = 2
            set_in(record_cache, (record.qname, num), record)

        for pair in record_cache.itervalues():
            # Only write complete pairs
            if (1 in pair) and (2 in pair):
                self._sink_pe_1.write_records([pair.pop(1)])
                self._sink_pe_2.write_records([pair.pop(2)])

            # Any orphan files are written to the SE sink
            for record in pair.itervalues():
                self._sink_se.write_records([record])

Example #21

0

Show file

File: remap.py Project: jelber2/paleomix

    def write_records(self, records):
        record_cache = {}
        for record in records:
            num = 0
            if record.is_read1:
                num = 1
            elif record.is_read2:
                num = 2
            set_in(record_cache, (record.qname, num), record)

        for pair in record_cache.itervalues():
            # Only write complete pairs
            if (1 in pair) and (2 in pair):
                self._sink_pe_1.write_records([pair.pop(1)])
                self._sink_pe_2.write_records([pair.pop(2)])

            # Any orphan files are written to the SE sink
            for record in pair.itervalues():
                self._sink_se.write_records([record])

Example #22

0

Show file

File: summary.py Project: MikkelSchubert/paleomix

    def _read_tables(self, prefixes, genomes):
        table = {}
        self._read_reads_settings(table)
        self._read_raw_bam_stats(table)
        self._read_lib_bam_stats(table)

        for (target, samples) in table.items():
            merged_samples = {}
            for (sample, libraries) in samples.items():
                merged_libraries = {}
                for (library, subtables) in libraries.items():
                    for (tblname, subtable) in subtables.items():
                        merged_libraries[tblname] = self._merge_tables((merged_libraries.get(tblname, {}), subtable))
                        merged_samples[tblname] = self._merge_tables((merged_samples.get(tblname, {}), subtable))
                    libraries[library] = self._annotate_subtables(subtables, genomes)
                set_in(table, (target, sample, "*"), self._annotate_subtables(merged_libraries, genomes))
            set_in(table, (target, "*", "*"), self._annotate_subtables(merged_samples, genomes))

        return table

Example #23

0

Show file

File: coverage.py Project: tmancill/paleomix

def read_table(table, filename):
    with open(filename) as table_file:
        for record in parse_padded_table(table_file):
            key = (
                record["Name"],
                record["Sample"],
                record["Library"],
                record["Contig"],
            )
            if "*" in key:
                continue

            subtable = get_in(table, key)
            if subtable is None:
                subtable = ReadGroup()
                subtable.Size = int(record["Size"])
                set_in(table, key, subtable)

            assert int(subtable.Size) == int(record["Size"])
            for key in ReadGroup.__slots__:
                if key != "Size":
                    subtable[key] += int(record.get(key, 0))

Example #24

0

Show file

File: remap.py Project: jelber2/paleomix

def convert_reads(config, destination, record, sink_cache):
    # Source name is used, to re-merge split lanes
    name = record.tags.get("PU_src")
    destination = os.path.join(destination, name)
    make_dirs(os.path.join(config.destination, destination))

    def _open_se_sink(reads_type):
        key = (name, reads_type)
        if not get_in(sink_cache, key):
            filename = ReadSink.get_filename(destination, reads_type.lower())
            set_in(sink_cache, key, ReadSink.open(config.destination, filename))
        return key

    for (reads_type, bam_files) in record.bams.iteritems():
        # Processed reads are pre-aligned BAMs which have been cleaned up
        if reads_type in ("Paired", "Processed"):
            # Record "Single" reads; these may result from orphan SE reads
            _open_se_sink("Singleton")

            key = (name, "Paired")
            if not get_in(sink_cache, key):
                set_in(sink_cache, key, PEReadSink.open(config.destination,
                                                        destination))
        else:
            key = _open_se_sink(reads_type)

        sink = get_in(sink_cache, key)
        for filename in bam_files:
            print("%sProcessing file %r" % (_INDENTATION * 4, filename))
            with pysam.Samfile(filename) as handle:
                def _keep_record(record):
                    return (record.qual >= config.min_quality) and \
                        (len(record.seq) >= config.min_length)

                sink.write_records(record for record in handle
                                   if _keep_record(record))

Example #25

0

Show file

File: remap.py Project: muslih14/paleomix

def convert_reads(config, destination, record, sink_cache):
    # Source name is used, to re-merge split lanes
    name = record.tags.get("PU_src")
    destination = os.path.join(destination, name)
    make_dirs(os.path.join(config.destination, destination))

    def _open_se_sink(reads_type):
        key = (name, reads_type)
        if not get_in(sink_cache, key):
            filename = ReadSink.get_filename(destination, reads_type.lower())
            set_in(sink_cache, key, ReadSink.open(config.destination, filename))
        return key

    for (reads_type, bam_files) in record.bams.iteritems():
        # Processed reads are pre-aligned BAMs which have been cleaned up
        if reads_type in ("Paired", "Processed"):
            # Record "Single" reads; these may result from orphan SE reads
            _open_se_sink("Singleton")

            key = (name, "Paired")
            if not get_in(sink_cache, key):
                set_in(sink_cache, key, PEReadSink.open(config.destination,
                                                        destination))
        else:
            key = _open_se_sink(reads_type)

        sink = get_in(sink_cache, key)
        for filename in bam_files:
            print("%sProcessing file %r" % (_INDENTATION * 4, filename))
            with pysam.Samfile(filename) as handle:
                def _keep_record(record):
                    return (record.qual >= config.min_quality) and \
                        (len(record.seq) >= config.min_length)

                sink.write_records(record for record in handle
                                   if _keep_record(record))

Example #26

0

Show file

File: coverage.py Project: tmancill/paleomix

def calculate_totals(table):
    lengths = {}
    for samples in table.values():
        for libraries in samples.values():
            for contigs in libraries.values():
                for (name, contig) in contigs.items():
                    size = lengths.get(name)
                    if (size is not None) and (size != contig.Size):
                        raise BAMStatsError(name)
                    lengths[name] = contig.Size

    for (name, samples) in sorted(table.items()):
        for (sample, libraries) in sorted(samples.items()):
            for (library, contigs) in sorted(libraries.items()):
                totals = _calculate_totals_in(contigs, lengths)
                set_in(table, (name, sample, library), totals)

            totals = _calculate_totals_in(libraries, lengths)
            set_in(table, (name, sample, "*"), totals)

        set_in(table, (name, "*", "*"), _calculate_totals_in(table, lengths))
    return table

Example #27

0

Show file

File: coverage.py Project: muslih14/paleomix

def calculate_totals(table):
    lengths = {}
    for samples in table.itervalues():
        for libraries in samples.values():
            for contigs in libraries.values():
                for (name, contig) in contigs.iteritems():
                    size = lengths.get(name)
                    if (size is not None) and (size != contig["Size"]):
                        raise BAMStatsError(name)
                    lengths[name] = contig["Size"]

    for (name, samples) in sorted(table.items()):
        for (sample, libraries) in sorted(samples.items()):
            for (library, contigs) in sorted(libraries.items()):
                totals = _calculate_totals_in(contigs, lengths)
                set_in(table, (name, sample, library), totals)

            totals = _calculate_totals_in(libraries, lengths)
            set_in(table, (name, sample, "*"), totals)

        set_in(table, (name, "*", "*"), _calculate_totals_in(table, lengths))
    return table

Example #28

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__fail_on_invalid_sub_dictionary_third_level():
    utils.set_in({1: {2: None}}, [1, 2, 3], 17)

Example #29

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__fail_on_invalid_sub_dictionary_second_level():
    utils.set_in({1: None}, [1, 2], 17)

Example #30

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__fail_on_invalid_sub_dictionary_first_level():
    utils.set_in(None, [1], 17)

Example #31

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__fail_on_no_kws():
    utils.set_in({}, [], 17)

Example #32

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__update_value_two_kw():
    value = {1: {2: 3}}
    utils.set_in(value, [1, 2], 365)
    assert_equal(value, {1: {2: 365}})

Example #33

0

Show file

File: remap.py Project: jelber2/paleomix

 def _open_se_sink(reads_type):
     key = (name, reads_type)
     if not get_in(sink_cache, key):
         filename = ReadSink.get_filename(destination, reads_type.lower())
         set_in(sink_cache, key, ReadSink.open(config.destination, filename))
     return key

Example #34

0

Show file

def test_set_in__fail_on_invalid_sub_dictionary_first_level():
    with pytest.raises(TypeError):
        utils.set_in(None, [1], 17)

Example #35

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__three_kws_in_empty_dictionary():
    value = {}
    utils.set_in(value, ["Foo", 13, (1, 2)], 17)
    assert_equal(value, {"Foo": {13: {(1, 2): 17}}})

Example #36

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__two_kws_in_empty_dictionary():
    value = {}
    utils.set_in(value, ["Foo", 13], 17)
    assert_equal(value, {"Foo": {13: 17}})

Example #37

0

Show file

def test_set_in__fail_on_no_kws():
    with pytest.raises(ValueError):
        utils.set_in({}, [], 17)

Example #38

0

Show file

File: remap.py Project: muslih14/paleomix

 def _open_se_sink(reads_type):
     key = (name, reads_type)
     if not get_in(sink_cache, key):
         filename = ReadSink.get_filename(destination, reads_type.lower())
         set_in(sink_cache, key, ReadSink.open(config.destination, filename))
     return key

Example #39

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__iteratable_keywords():
    value = {}
    utils.set_in(value, iter(["Foo", 13, (1, 2)]), 17)
    assert_equal(value, {"Foo": {13: {(1, 2): 17}}})

Example #40

0

Show file

def test_set_in__fail_on_invalid_sub_dictionary_second_level():
    with pytest.raises(TypeError):
        utils.set_in({1: None}, [1, 2], 17)

Example #41

0

Show file

def test_set_in__fail_on_invalid_sub_dictionary_third_level():
    with pytest.raises(TypeError):
        utils.set_in({1: {2: None}}, [1, 2, 3], 17)

Example #42

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__single_kw_in_empty_dictionary():
    value = {}
    utils.set_in(value, ["Foo"], 17)
    assert_equal(value, {"Foo": 17})

Example #43

0

Show file

File: utilities_test.py Project: jelber2/paleomix

def test_set_in__update_value_one_kw():
    value = {1: None}
    utils.set_in(value, [1], 3.14)
    assert_equal(value, {1: 3.14})