def _run(self): mccortex_graphs = [] self.mccortex_builders.append(self.mccortex_builder) for idx, mc_builder in enumerate(self.mccortex_builders): mc_builder.with_kmer_size(self.kmer_size) builder_dir = self.tmpdir / 'mc_graph_{}'.format(idx) builder_dir.mkdir() mc_graph, _ = mc_builder.build(builder_dir) mccortex_graphs.append(mc_graph) contig_fasta = self._write_contig_fasta() self.traversal = self.tmpdir / 'traversal.ctx' if self.spawn_process is None: ctp_runner = runner.Cortexpy(SPAWN_PROCESS) else: ctp_runner = runner.Cortexpy(self.spawn_process) return ctp_runner.subgraph( graphs=mccortex_graphs, out=self.traversal, contig=contig_fasta, contig_fasta=True, verbose=self.verbose, silent=self.silent, colors=self.colors, logging_interval=self.logging_interval_seconds)
def test_outputs_contigs_for_bubble(self, tmpdir): # given records = [ 'AAACCC', 'AAAGCCC', ] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(graph=output_graph, contig='AAA') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in records: expect.has_record(record) expect.has_n_records(2)
def run(self): self.traverse_driver.run() if self.subgraphs: assert self.to_json out_prefix = Path(str(self.tmpdir)) / 'subgraphs' else: out_prefix = None links = self.link_builder.build(self.tmpdir, self.traverse_driver.traversal) ret = runner.Cortexpy(SPAWN_PROCESS).traverse( self.traverse_driver.traversal, to_json=self.to_json, contig=self.seed_strings, graph_index=self.graph_index, extra_start_kmer=self.extra_start_kmer, links_file=links) print(ret.stdout) print(ret.stderr, file=sys.stderr) assert ret.returncode == 0, ret if self.to_json: if self.subgraphs: return expectation.JsonGraphs( list(out_prefix.parent.glob(out_prefix.name + '*'))) else: return expectation.JsonGraph(json.loads(ret.stdout)) return expectation.Fasta(ret.stdout)
def test_dal19_data(self, tmpdir): # given records = [ 'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT', 'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA', 'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG' ] expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]] kmer_size = 47 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(to_json=False, graph=output_graph, contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in expected_records: expect.has_record(record) expect.has_n_records(2)
def test_outputs_multiple_combinations(self, tmpdir): # given records = [ 'CAACC', 'AAACA', 'AAACT', ] n_paths = 6 kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = ( runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False, graph=output_graph, contig='AAA') ) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) ids = ['g0_p{}'.format(i) for i in range(n_paths)] for record in records: expect.has_record(record).has_id_in(*ids) expect.has_record('CAACT').has_id_in(*ids) expect.has_record('CAACA').has_id_in(*ids) expect.has_record('AAACC').has_id_in(*ids) expect.has_n_records(6)
def run(self): inital_seq_fasta = self.tmpdir / 'initial_seqs.fasta' initial_seqs = [ SeqRecord(Seq(rec), id=str(idx)) for idx, rec in enumerate(self.initial_seqs) ] SeqIO.write(initial_seqs, str(inital_seq_fasta), "fasta") output_graph = self.mccortex_builder.build(self.tmpdir) output = self.tmpdir / 'output.fa' completed_process = runner.Cortexpy(SPAWN_PROCESS).assemble( graph=output_graph, initial_seqs=inital_seq_fasta, out=output) assert completed_process.returncode == 0, completed_process return expectation.Fasta(open(output, 'r').read())
def test_prints_single_kmer(self, tmpdir): # given kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( 'ACCAA').with_kmer_size(kmer_size).build(tmpdir)) expected_kmer = 'CAA: CAA 1 1 .c...... ........' # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig='CAA').stdout # then assert [expected_kmer ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
def test_with_empty_input_outputs_empty_fasta(self, tmpdir): # given input_graph = str(tmpdir / 'input.gfa') output_graph = tmpdir / 'output.fast' with open(input_graph, 'w') as fh: fh.write(GfaBuilder().build()) # when runner.Cortexpy().traverse(input_graph, out=output_graph, input_gfa=True) # then expect = Fasta(output_graph.read()) expect.has_no_records()
def test_prints_one_missing_kmer(self, tmpdir): # given kmer_size = 3 record = 'GGG' output_graph = (builder.Mccortex().with_dna_sequence( 'AAAA').with_kmer_size(kmer_size).build(tmpdir)) expected_kmer = 'CCC: GGG 0 1 ........ ........' # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=record).stdout # then assert [expected_kmer ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
def test_does_not_raise_without_max_nodes(self, tmpdir): # given query = 'CAACC' records = [query] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(True).subgraph(graphs=[output_graph], contig=query, out=tmpdir / 'discarded.pickle', ) # then assert 0 == completed_process.returncode
def test_raises_on_max_nodes_exceeded(self, tmpdir): # given query = 'CAA' records = ['CAACC'] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(spawn_process=True).subgraph(graphs=[output_graph], contig=query, max_nodes=1) # then assert 0 != completed_process.returncode assert 'Max nodes (1) exceeded: 3 nodes found' in completed_process.stderr
def test_prints_three_kmers_including_one_revcomp(self, tmpdir): # given record = 'ACCTT' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( record).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'AAG 1 ......G.', 'ACC 1 .......T', 'AGG 1 a......T', ] # when stdout = runner.Cortexpy().view_graph(output_graph).stdout # then assert expected_kmers == CortexpyPrintOutputParser( stdout).get_kmer_strings()
def run(self): mccortex_graph = self.mccortex_builder.build(self.tmpdir) contig_fasta = self.tmpdir / 'initial_contigs.fa' with open(str(contig_fasta), 'w') as fh: SeqIO.write(self.records, fh, 'fasta') ctp_runner = runner.Cortexpy(SPAWN_PROCESS) pruned_graph = Path(str(mccortex_graph)).with_suffix('.pruned.ctx') completed_process = ctp_runner.prune(graph=mccortex_graph, out=pruned_graph, remove_tips=self.min_tip_length, verbose=True) assert completed_process.returncode == 0, completed_process print(completed_process.stdout) print(completed_process.stderr, file=sys.stderr) return expectation.graph.KmerGraphExpectation( load_cortex_graph(pruned_graph))
def test_prints_three_kmers(self, tmpdir): # given record = 'ACCAA' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( record).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'ACC: ACC 1 1 ....A... ....A...', 'CCA: CCA 1 1 a...A... a...A...', 'CAA: CAA 1 1 .c...... .c......', ] # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=record).stdout # then assert expected_kmers == CortexpyPrintOutputParser( stdout).get_kmer_strings()
def test_prints_one_missing_and_one_revcomp_kmer(self, tmpdir): # given dna_sequence = 'ACCTT' search_record = 'ACTT' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( dna_sequence).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'ACT: ACT 0 1 ........ .......T', 'AAG: CTT 1 1 .C...... A.......', ] # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=search_record).stdout expect = ViewExpectation(stdout) # then (expect.has_kmer(expected_kmers[0]).has_kmer( expected_kmers[1]).has_n_kmers(2))
def test_outputs_fasta_of_kmers(self, tmpdir): # given record = 'ATTCC' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record) \ .with_kmer_size(kmer_size) \ .build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .view_graph(kmers=True, graph=output_graph) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) expect.has_record('AAT') expect.has_record('GAA') expect.has_record('GGA') expect.has_n_records(3)
def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths): # given MAX_PATH_EXIT_CODE = 64 records = ['CAACC', 'CAACT'] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) # when completed_process = runner.Cortexpy(spawn_process=True).traverse( graph=maker.build(tmpdir), max_paths=max_paths) # then if max_paths == 1: assert MAX_PATH_EXIT_CODE == completed_process.returncode assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr else: assert 0 == completed_process.returncode expect = expectation.Fasta(completed_process.stdout) expect.has_record('CAACC') expect.has_record('CAACT') expect.has_n_records(2)