Esempio n. 1
0
    def test_parses_a_graph(self, tmpdir):
        # given
        kmer_size = 3
        mc_builder = (builder.Mccortex().with_dna_sequence(
            'ACGTT').with_kmer_size(kmer_size))

        expected_kmers = [
            KmerRecord('AAC', (1, ), [as_edge_set('......G.')]),
            KmerRecord('ACG', (2, ), [as_edge_set('a......T')]),
        ]

        # when
        output_graph = mc_builder.build(tmpdir)

        kmer_generator = kmer_generator_from_stream(open(output_graph, 'rb'))

        # then
        actual_kmers = list(kmer_generator)
        for kmer in actual_kmers:
            logger.info(kmer)
        for expected_kmer, kmer in zip(expected_kmers, actual_kmers):
            assert kmer.kmer == expected_kmer.kmer
            assert kmer.coverage == expected_kmer.coverage
            assert kmer.edges == expected_kmer.edges
        assert len(actual_kmers) == len(expected_kmers)
Esempio n. 2
0
 def test_raises_on_non_lexlo_kmer(self):
     # when
     rec = KmerRecord('AAA', [1], [edge_set.empty()])
     kmer = Kmer.from_kmer_data(
         KmerData(rec.to_bytestring(), kmer_size=3, num_colors=1))
     with pytest.raises(AttributeError):
         kmer.kmer = reverse_complement(kmer.kmer)
Esempio n. 3
0
 def with_kmer_record(self, kmer):
     colors_so_far = 0
     for graph_idx, n_colors in enumerate(self.n_colors_per_graph):
         last_color_idx = n_colors + colors_so_far
         graph_kmer = KmerRecord(
             kmer.kmer, kmer.coverage[colors_so_far:last_color_idx],
             kmer.edges[colors_so_far:last_color_idx])
         self.graph_builders[graph_idx].with_kmer_record(graph_kmer)
         colors_so_far += n_colors
     return self
Esempio n. 4
0
    def test_gets_aaa_for_ttt_query(self, RAClass):
        # given
        graph_builder = builder.Graph()
        graph_builder.with_kmer_size(3)
        graph_builder.with_num_colors(1)

        expected_kmer = KmerRecord('AAA', [1], [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = RAClass(graph_builder.build())

        # when
        assert expected_kmer.kmer == cg.get_kmer_for_string('AAA').kmer
        assert expected_kmer.kmer == cg.get_kmer_for_string('TTT').kmer
Esempio n. 5
0
    def test_gets_aaa(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1))

        expected_kmer = KmerRecord('AAA', (1, ), [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = self.RAClass(graph_builder.build())

        # when
        for kmer in cg.values():
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            assert expected_kmer.edges == kmer.edges
Esempio n. 6
0
def kmer_records(draw, kmer_size, num_colors, kmer_strings=dna_sequences):
    kmer = draw(kmer_strings(min_size=kmer_size, max_size=kmer_size))
    coverage = tuple(
        draw(
            s.lists(s.integers(min_value=1, max_value=MAX_UINT),
                    min_size=num_colors,
                    max_size=num_colors)))
    edges = np.array(draw(
        s.lists(s.lists(s.integers(min_value=0, max_value=1),
                        min_size=8,
                        max_size=8),
                min_size=num_colors,
                max_size=num_colors)),
                     dtype=np.uint8)
    edges = [EdgeSet(np.concatenate((e[:4], e[::-1][:4]))) for e in edges]
    return KmerRecord(kmer, coverage, edges)
Esempio n. 7
0
    def test_retrieves_kmer_by_random_access(self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            'ACGTTT').with_kmer_size(kmer_size).build(tmpdir))

        expected = KmerRecord('AAC', (1, ), [as_edge_set('A.....G.')])
        cg = RandomAccess(open(output_graph, 'rb'))

        # when
        actual = cg['AAC']

        # then
        logger.info(actual)

        assert actual.kmer == expected.kmer
        assert actual.coverage == expected.coverage
        assert actual.edges == expected.edges
Esempio n. 8
0
 def with_kmer(self,
               kmer_string,
               color_coverage=1,
               edges='........',
               repeat_color_edges_n_times=None):
     if ' ' in kmer_string:
         assert '  ' not in kmer_string
         num_words = kmer_string.count(' ')
         kmer_words = kmer_string.split(' ')
         kmer_string = kmer_words.pop(0)
         assert num_words % 2 == 0
         num_colors = num_words // 2
         self.with_num_colors(num_colors)
         color_coverage = [int(word) for word in kmer_words[0:num_colors]]
         edges = kmer_words[num_colors:]
     revcomp = str(Seq(kmer_string).reverse_complement())
     if revcomp < kmer_string:
         raise Exception(
             "kmer_string '{}' is not lexlo.  Please fix.".format(
                 kmer_string))
     if (repeat_color_edges_n_times and isinstance(edges, str)
             and isinstance(color_coverage, int)):
         self.with_num_colors(repeat_color_edges_n_times)
         edges = [edges for _ in range(repeat_color_edges_n_times)]
         color_coverage = [
             color_coverage for _ in range(repeat_color_edges_n_times)
         ]
     if isinstance(edges, str):
         edges = [edges]
     if isinstance(color_coverage, int):
         color_coverage = [color_coverage]
     if self.kmer_size_is_set:
         assert self.kmer_size == len(kmer_string)
     else:
         self.with_kmer_size(len(kmer_string))
     return self.with_kmer_record(
         KmerRecord(kmer_string, color_coverage,
                    tuple([as_edge_set(e) for e in edges])))
Esempio n. 9
0
    def test_parses_a_graph_with_kmer_size_32(self, tmpdir):
        # given
        kmer_size = 33
        contig = ''.join(list(repeat('A', kmer_size)))
        mc_builder = (builder.Mccortex().with_dna_sequence(
            contig).with_kmer_size(kmer_size))

        expected_kmers = [
            KmerRecord(contig, (1, ), [as_edge_set('........')]),
        ]

        # when
        output_graph = mc_builder.build(tmpdir)

        kmer_generator = kmer_generator_from_stream(open(output_graph, 'rb'))

        # then
        actual_kmers = list(kmer_generator)
        for kmer in actual_kmers:
            logger.info(kmer)
        for expected_kmer, kmer in zip(expected_kmers, actual_kmers):
            assert kmer.kmer == expected_kmer.kmer
            assert kmer.coverage == expected_kmer.coverage
            assert kmer.edges == expected_kmer.edges