Ejemplo n.º 1
0
    def test_record_retrieval(self, data, base_kmer_size, num_colors_per_graph,
                              n_kmers):
        # given
        kmer_size = base_kmer_size * 2 + 1
        num_colors = sum(num_colors_per_graph)
        collection_builder = GraphCollection(
            n_colors_per_graph=num_colors_per_graph, kmer_size=kmer_size)

        expected_kmers = []
        seen_kmers = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen_kmers:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen_kmers.add(kmer.kmer)
            collection_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)

        collection = collection_builder.build()

        # when
        for expected_kmer in expected_kmers:
            kmer = collection[expected_kmer.kmer]

            # then
            assert expected_kmer.kmer == kmer.kmer
            assert expected_kmer.coverage == kmer.coverage
            assert expected_kmer.edges == kmer.edges
Ejemplo n.º 2
0
    def test_record_retrieval(self, data, kmer_size, num_colors, n_kmers):
        # given
        assume(n_kmers <= 4**(kmer_size - 1))
        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen_kmers = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen_kmers:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen_kmers.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)

        cg = self.RAClass(graph_builder.build())

        # when
        for expected_kmer in expected_kmers:
            kmer = cg[expected_kmer.kmer]

            # then
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            for expected, actual in zip(expected_kmer.edges, kmer.edges):
                assert expected == actual
Ejemplo n.º 3
0
    def test_index(self, data, kmer_size, n_kmers):
        # given
        assume(kmer_size % 2 == 1)
        num_colors = 1
        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen_kmers = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen_kmers:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen_kmers.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)
        expected_kmers = sorted(expected_kmers)

        graph_stream = graph_builder.build()
        header_stream = graph_builder.header.build()
        header = Header.from_stream(header_stream)

        # when
        sequence = KmerUintSequence(graph_handle=graph_stream,
                                    body_start=len(header_stream.getvalue()),
                                    header=header,
                                    n_records=len(expected_kmers))
        # then
        for idx, expected_kmer in enumerate(expected_kmers):
            # then
            assert idx == sequence.index_kmer_string(expected_kmer.kmer)
Ejemplo n.º 4
0
    def test_parses_records(self, data, kmer_size, num_colors, n_kmers,
                            test_serializer):
        # given
        assume(n_kmers <= (4**kmer_size) / 4)

        graph_builder = (builder.Graph().with_kmer_size(
            kmer_size).with_num_colors(num_colors))

        expected_kmers = []
        seen = set()
        for _ in range(n_kmers):
            kmer = data.draw(kmer_records(kmer_size, num_colors))
            while kmer.kmer in seen:
                kmer = data.draw(kmer_records(kmer_size, num_colors))
            seen.add(kmer.kmer)
            graph_builder.with_kmer_record(kmer)
            expected_kmers.append(kmer)
        ra_parser = self.RAClass(graph_builder.build())

        if test_serializer:
            for real_kmer in ra_parser.values():
                buffer = io.BytesIO()
                real_kmer.dump(buffer)
                assert real_kmer._kmer_data._data == buffer.getvalue()

            sample_names = ra_parser.sample_names
            buffer = io.BytesIO()
            key_list = list(ra_parser.keys())
            random.shuffle(key_list)
            kmer_serializer \
                .Kmers(keys=key_list,
                       val_callable=lambda k: ra_parser[k],
                       kmer_size=kmer_size,
                       num_colors=num_colors,
                       sample_names=sample_names) \
                .dump(buffer)
            buffer.seek(0)
            ra_parser = self.RAClass(buffer)

        # when
        for expected_kmer in expected_kmers:
            kmer = ra_parser[expected_kmer.kmer]

            # then
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            assert expected_kmer.edges == kmer.edges
Ejemplo n.º 5
0
    def test_converts_to_raw(self, data, kmer_size, num_colors, from_bioseq):
        # given
        assume(kmer_size % 2 == 1)
        kmer = Kmer.from_kmer_data(
            KmerData(data.draw(kmer_records(kmer_size,
                                            num_colors)).to_bytestring(),
                     kmer_size=kmer_size,
                     num_colors=num_colors))
        converter = StringKmerConverter(kmer.kmer_size)

        # when
        if from_bioseq:
            raw_kmer = converter.to_raw(Seq(kmer.kmer))
        else:
            raw_kmer = converter.to_raw(kmer.kmer)

        # then
        assert kmer.get_raw_kmer() == raw_kmer