def test_outputs_contigs_for_bubble(self, tmpdir): # given records = [ 'AAACCC', 'AAAGCCC', ] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(graph=output_graph, contig='AAA') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in records: expect.has_record(record) expect.has_n_records(2)
def test_dal19_data(self, tmpdir): # given records = [ 'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT', 'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA', 'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG' ] expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]] kmer_size = 47 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(to_json=False, graph=output_graph, contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in expected_records: expect.has_record(record) expect.has_n_records(2)
def test_outputs_multiple_combinations(self, tmpdir): # given records = [ 'CAACC', 'AAACA', 'AAACT', ] n_paths = 6 kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = ( runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False, graph=output_graph, contig='AAA') ) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) ids = ['g0_p{}'.format(i) for i in range(n_paths)] for record in records: expect.has_record(record).has_id_in(*ids) expect.has_record('CAACT').has_id_in(*ids) expect.has_record('CAACA').has_id_in(*ids) expect.has_record('AAACC').has_id_in(*ids) expect.has_n_records(6)
def run(self): self.traverse_driver.run() if self.subgraphs: assert self.to_json out_prefix = Path(str(self.tmpdir)) / 'subgraphs' else: out_prefix = None links = self.link_builder.build(self.tmpdir, self.traverse_driver.traversal) ret = runner.Cortexpy(SPAWN_PROCESS).traverse( self.traverse_driver.traversal, to_json=self.to_json, contig=self.seed_strings, graph_index=self.graph_index, extra_start_kmer=self.extra_start_kmer, links_file=links) print(ret.stdout) print(ret.stderr, file=sys.stderr) assert ret.returncode == 0, ret if self.to_json: if self.subgraphs: return expectation.JsonGraphs( list(out_prefix.parent.glob(out_prefix.name + '*'))) else: return expectation.JsonGraph(json.loads(ret.stdout)) return expectation.Fasta(ret.stdout)
def run(self): inital_seq_fasta = self.tmpdir / 'initial_seqs.fasta' initial_seqs = [ SeqRecord(Seq(rec), id=str(idx)) for idx, rec in enumerate(self.initial_seqs) ] SeqIO.write(initial_seqs, str(inital_seq_fasta), "fasta") output_graph = self.mccortex_builder.build(self.tmpdir) output = self.tmpdir / 'output.fa' completed_process = runner.Cortexpy(SPAWN_PROCESS).assemble( graph=output_graph, initial_seqs=inital_seq_fasta, out=output) assert completed_process.returncode == 0, completed_process return expectation.Fasta(open(output, 'r').read())
def test_outputs_fasta_of_kmers(self, tmpdir): # given record = 'ATTCC' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record) \ .with_kmer_size(kmer_size) \ .build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .view_graph(kmers=True, graph=output_graph) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) expect.has_record('AAT') expect.has_record('GAA') expect.has_record('GGA') expect.has_n_records(3)
def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths): # given MAX_PATH_EXIT_CODE = 64 records = ['CAACC', 'CAACT'] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) # when completed_process = runner.Cortexpy(spawn_process=True).traverse( graph=maker.build(tmpdir), max_paths=max_paths) # then if max_paths == 1: assert MAX_PATH_EXIT_CODE == completed_process.returncode assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr else: assert 0 == completed_process.returncode expect = expectation.Fasta(completed_process.stdout) expect.has_record('CAACC') expect.has_record('CAACT') expect.has_n_records(2)