def test_graph_parser_streaming(benchmark, graph_size, func_type): graph = GRAPHS[graph_size] func = FUNCS[func_type] buffer = io.BytesIO(open(graph, 'rb').read()) header = Header.from_stream(buffer) buffer = io.BytesIO(buffer.read()) benchmark(func, buffer, header)
def test_parses_a_graph_header(self, tmpdir): # given sample_name = 'sample_0' dna_sequence = 'ACGTT' kmer_size = 3 mc_builder = (builder.Mccortex().with_dna_sequence( dna_sequence, name=sample_name).with_kmer_size(kmer_size)) output_graph = mc_builder.build(tmpdir) expected_header_attributes = { 'version': 6, 'kmer_size': kmer_size, 'record_size': 13, 'kmer_container_size': 1, 'num_colors': 1, 'mean_read_lengths': (len(dna_sequence), ), 'total_sequences': (len(dna_sequence), ), 'sample_names': (sample_name.encode(), ) } # when header = Header.from_stream(open(output_graph, 'rb')) # then for key, value in expected_header_attributes.items(): assert getattr(header, key) == value
def test_index(self, data, kmer_size, n_kmers): # given assume(kmer_size % 2 == 1) num_colors = 1 graph_builder = (builder.Graph().with_kmer_size( kmer_size).with_num_colors(num_colors)) expected_kmers = [] seen_kmers = set() for _ in range(n_kmers): kmer = data.draw(kmer_records(kmer_size, num_colors)) while kmer.kmer in seen_kmers: kmer = data.draw(kmer_records(kmer_size, num_colors)) seen_kmers.add(kmer.kmer) graph_builder.with_kmer_record(kmer) expected_kmers.append(kmer) expected_kmers = sorted(expected_kmers) graph_stream = graph_builder.build() header_stream = graph_builder.header.build() header = Header.from_stream(header_stream) # when sequence = KmerUintSequence(graph_handle=graph_stream, body_start=len(header_stream.getvalue()), header=header, n_records=len(expected_kmers)) # then for idx, expected_kmer in enumerate(expected_kmers): # then assert idx == sequence.index_kmer_string(expected_kmer.kmer)
def header(self): color_info_blocks = [ ColorInformationBlock() for _ in range(self.num_colors) ] return Header(kmer_size=self.kmer_size, kmer_container_size=self.kmer_container_size, num_colors=self.num_colors, sample_names=self.sample_names, color_info_blocks=color_info_blocks)
def test_cython_speed(): graph = 'fixtures/yeast/NC_001133.9.16kbp.ctx' buffer = io.BytesIO(open(graph, 'rb').read()) header = Header.from_stream(buffer) buffer = io.BytesIO(buffer.read()) cProfile.runctx("stream_kmers_and_coverage_and_edges(buffer, header)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") raise Exception(s.strip_dirs().sort_stats("time").print_stats())
def test_does_not_raise(self): # given graph_header_fixture = os.path.join(os.path.dirname(__file__), 'many_colors_header_only.ctx') # when with open(graph_header_fixture, 'rb') as header_handle: header = Header.from_stream(header_handle) # then assert header.version == 6 assert header.kmer_size == 47 assert header.kmer_container_size == 2 assert header.num_colors == 25
def kmer_generator_from_stream(stream): header = Header.from_stream(stream) return kmer_generator_from_stream_and_header(stream, header)
def load_cortex_graph(stream): header = Header.from_stream(stream) kmer_generator = kmer_generator_from_stream_and_header(stream, header) return build_cortex_graph_from_header(header, kmer_generator=kmer_generator)