def test_05_single_block_single_nucletodide(self): maf_path = self.maf_files_dir.joinpath( "test_5_single_block_single_nucletodide.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None, block_id=bid(0)) ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [graph.SeqPath([*map(nid, [0])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [graph.SeqPath([*map(nid, [0])])], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_7_missing_one_reverted_sequence_middle_minus1_minus1(self): maf_path = self.maf_files_dir.joinpath( "test_7_missing_one_reverted_sequence_middle_minus1_minus1.maf") expected_nodes = [ # block 0 graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None), # missing seq2 graph.Node(node_id=nid(4), base=graph.Base(self.missing_n.value), aligned_to=None), graph.Node(node_id=nid(5), base=graph.Base(self.missing_n.value), aligned_to=None), # block 1 graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=nid(7)), graph.Node(node_id=nid(7), base=graph.Base('G'), aligned_to=nid(6)), graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=nid(9)), graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=nid(8)), graph.Node(node_id=nid(10), base=graph.Base('C'), aligned_to=nid(11)), graph.Node(node_id=nid(11), base=graph.Base('T'), aligned_to=nid(10)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence( msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0, 1, 2, 3, 7, 9, 11])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence( msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 1, 4, 5, 6, 8, 10])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), missings.ConstBaseProvider(self.missing_n), self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_04_single_block_no_nucleotides(self): maf_path = self.maf_files_dir.joinpath( "test_4_single_block_no_nucleotides.maf") expected_nodes = [] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_10_parallel_blocks_1st_and_2nd_merge_into_3rd(self): maf_path = self.maf_files_dir.joinpath("test_10_parallel_blocks_1st_and_2nd_merge_into_3rd.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('G'), aligned_to=nid(1)), graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=nid(0)), graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(3), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(4), base=graph.Base('C'), aligned_to=nid(5)), graph.Node(node_id=nid(5), base=graph.Base('G'), aligned_to=nid(4)), graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(10), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(12), base=graph.Base('C'), aligned_to=nid(13)), graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=nid(12)), graph.Node(node_id=nid(14), base=graph.Base('C'), aligned_to=nid(15)), graph.Node(node_id=nid(15), base=graph.Base('G'), aligned_to=nid(16)), graph.Node(node_id=nid(16), base=graph.Base('T'), aligned_to=nid(14)), graph.Node(node_id=nid(17), base=graph.Base('A'), aligned_to=nid(18)), graph.Node(node_id=nid(18), base=graph.Base('T'), aligned_to=nid(17)), graph.Node(node_id=nid(19), base=graph.Base('A'), aligned_to=nid(20)), graph.Node(node_id=nid(20), base=graph.Base('C'), aligned_to=nid(19)), graph.Node(node_id=nid(21), base=graph.Base('C'), aligned_to=nid(22)), graph.Node(node_id=nid(22), base=graph.Base('G'), aligned_to=nid(21)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [7, 8, 9, 10, 11, 12, 15, 18, 19, 21])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 2, 3, 4, 6, 13, 16, 17, 20, 21])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [graph.SeqPath([*map(nid, [1, 2, 3, 5, 6, 13, 14, 17, 20, 22])])], graph.SequenceMetadata({'group': '2'})), } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_09_inactive_edges_but_all_strands_plus(self): maf_path = self.maf_files_dir.joinpath("test_9_inactive_edges_but_all_strands_plus.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(7), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(10), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(11), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(13), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(14), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(15), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(16), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(17), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(18), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(19), base=graph.Base('G'), aligned_to=None), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 10, 11, 12, 13, 14])]), graph.SeqPath([*map(nid, [5, 6, 7, 8, 9, 15, 16, 17, 18, 19])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})), } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_6_missing_one_reverted_sequence_middle_minus1_1(self): maf_path = self.maf_files_dir.joinpath( "test_6_missing_one_reverted_sequence_middle_minus1_1.maf") expected_nodes = [ # block 1 because it is first in DAG and reverted graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(2), base=graph.Base('C'), aligned_to=nid(3)), graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=nid(2)), # missing seq2, on edge (-1,1) graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(5), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=nid(7)), graph.Node(node_id=nid(7), base=graph.Base('C'), aligned_to=nid(6)), graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(10), base=graph.Base('A'), aligned_to=nid(11)), graph.Node(node_id=nid(11), base=graph.Base('C'), aligned_to=nid(10)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [ graph.SeqPath([*map(nid, [0, 1, 2])]), graph.SeqPath([*map(nid, [6, 8, 9, 10])]) ], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence( msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 11])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def run_pangtree(maf_path: Path, fasta_path: Path, output_dir: Path, po_output: bool) -> None: output_dir = pathtools.get_child_dir(output_dir, pathtools.get_current_time()) print(f"Runing pangtree for maf: {maf_path} and fasta: {fasta_path} " f"Output in: {output_dir}, include po file: {po_output}.") fasta_provider = missings.FromFile(fasta_path) maf = msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path) poagraph, dagmaf = builder.build_from_dagmaf(maf, fasta_provider) for p in p_values: current_output_dir = pathtools.get_child_dir(output_dir, str(p).replace(".", "_")) stop = at_params.Stop(0.99) at = at_builders.build_affinity_tree(poagraph, None, current_output_dir, stop, at_params.P(p), True) at_newick = at.as_newick(None, separate_leaves=True) pathtools.save_to_file( at_newick, pathtools.get_child_path(current_output_dir, "affinity_tree.newick")) if po_output: pangenome_po = po.poagraph_to_PangenomePO(poagraph) pathtools.save_to_file( pangenome_po, pathtools.get_child_path(current_output_dir, "poagraph.po")) task_params = json.TaskParameters( multialignment_file_path=str(maf_path), multialignment_format="maf", datatype="nucleotides", blosum_file_path="", output_path=current_output_dir, fasta_provider=fasta_provider, fasta_source_file=fasta_path, consensus_type="tree", stop=str(stop), p=str(p), output_with_nodes=False) pangenomejson = json.to_PangenomeJSON(task_parameters=task_params, poagraph=poagraph, dagmaf=dagmaf, affinity_tree=at) pangenome_json_str = json.to_json(pangenomejson) pathtools.save_to_file( pangenome_json_str, pathtools.get_child_path(current_output_dir, "pangenome.json"))
def test_2_missing_sequence_end(self): maf_path = self.maf_files_dir.joinpath( "test_2_missing_sequence_end.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=nid(1)), graph.Node(node_id=nid(1), base=graph.Base('G'), aligned_to=nid(0)), graph.Node(node_id=nid(2), base=graph.Base('C'), aligned_to=nid(3)), graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=nid(2)), graph.Node(node_id=nid(4), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=nid(6)), graph.Node(node_id=nid(6), base=graph.Base('C'), aligned_to=nid(5)), graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=None), graph.Node(node_id=nid(11), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(12), base=graph.Base('T'), aligned_to=None), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence( msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0, 2, 4, 5, 8, 9, 10])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence( msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [1, 3, 4, 6, 7, 11, 12])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_10_metadata_feed_to_alignment_from_csv(self, test_name, maf_name, csv_name, po_name, expected_metadata): maf_path = self.alignment_files_dir.joinpath(maf_name) csv_path = self.csv_files_dir.joinpath(csv_name) po_path = self.alignment_files_dir.joinpath(po_name) poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata) poagraph = builder.build_from_maf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata) poagraph = builder.build_from_po( msa.Po(pathtools.get_file_content_stringio(po_path), maf_path), msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata)
def test_06_1st_block_separates_into_2_branches_which_connect_in_3rd_block(self): maf_path = self.maf_files_dir.joinpath( "test_6_1st_block_separates_into_2_branches_which_connect_in_3rd_block.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=nid(1)), graph.Node(node_id=nid(1), base=graph.Base('C'), aligned_to=nid(2)), graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=nid(0)), graph.Node(node_id=nid(3), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(4), base=graph.Base('A'), aligned_to=nid(5)), graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4)), graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(7), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=nid(9)), graph.Node(node_id=nid(9), base=graph.Base('G'), aligned_to=nid(10)), graph.Node(node_id=nid(10), base=graph.Base('T'), aligned_to=nid(8)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [graph.SeqPath([*map(nid, [0, 3, 4, 8])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [1, 3, 5, 6, 7, 9])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [2, 3, 5, 10])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_02_seq_starts_in_second_block(self): maf_path = self.maf_files_dir.joinpath( "test_2_seq_starts_in_second_block.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None, block_id=bid(0)), graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=None, block_id=bid(0)), graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=None, block_id=bid(0)), graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=None, block_id=bid(1)), graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=nid(5), block_id=bid(2)), graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4), block_id=bid(2)), graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=None, block_id=bid(2)), graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None, block_id=bid(2)), graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None, block_id=bid(2)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [graph.SeqPath([*map(nid, [1, 2, 3])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0, 5, 7])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [3, 4, 6, 8])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_08_reversed_block(self): maf_path = self.maf_files_dir.joinpath("test_8_reversed_block.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None), graph.Node(node_id=nid(1), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None), # next block is reversed because it was converted to dag graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=nid(6)), graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=nid(5)), graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None), graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None), graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 8, 9])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 8, 9])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 9])])], graph.SequenceMetadata({'group': '2'})), } expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences) actual_poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, self.metadatacsv) self.assertEqual(expected_poagraph, actual_poagraph)
def test_1_messy_sequences(self): maf_path = self.maf_files_dir.joinpath("test_1_messy_sequences.maf") expected_nodes = [ graph.Node(node_id=nid(0), base=graph.Base('A'), aligned_to=None, block_id=bid(0)), graph.Node(node_id=nid(1), base=graph.Base('A'), aligned_to=nid(2), block_id=bid(0)), graph.Node(node_id=nid(2), base=graph.Base('C'), aligned_to=nid(1), block_id=bid(0)), graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=None, block_id=bid(0)), graph.Node(node_id=nid(4), base=graph.Base('C'), aligned_to=nid(5), block_id=bid(0)), graph.Node(node_id=nid(5), base=graph.Base('G'), aligned_to=nid(4), block_id=bid(0)), graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=None, block_id=bid(1)), graph.Node(node_id=nid(7), base=graph.Base('C'), aligned_to=None, block_id=bid(1)), graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None, block_id=bid(1)), graph.Node(node_id=nid(9), base=graph.Base('C'), aligned_to=nid(10), block_id=bid(2)), graph.Node(node_id=nid(10), base=graph.Base('G'), aligned_to=nid(9), block_id=bid(2)), graph.Node(node_id=nid(11), base=graph.Base('T'), aligned_to=None, block_id=bid(2)), graph.Node(node_id=nid(12), base=graph.Base('C'), aligned_to=None, block_id=bid(2)), graph.Node(node_id=nid(13), base=graph.Base('C'), aligned_to=None, block_id=bid(2)), graph.Node(node_id=nid(14), base=graph.Base('A'), aligned_to=None, block_id=bid(2)), ] expected_sequences = { msa.SequenceID('seq0'): graph.Sequence( msa.SequenceID('seq0'), [graph.SeqPath([*map(nid, [1, 3, 4, 6, 8, 9, 11, 12])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq1'): graph.Sequence( msa.SequenceID('seq1'), [graph.SeqPath([*map(nid, [2, 3, 4, 10, 11, 12, 13, 14])])], graph.SequenceMetadata({'group': '1'})), msa.SequenceID('seq2'): graph.Sequence( msa.SequenceID('seq2'), [graph.SeqPath([*map(nid, [0, 2, 5, 6, 7, 10, 11, 12, 14])])], graph.SequenceMetadata({'group': '2'})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [], graph.SequenceMetadata({'group': '2'})) } actual_nodes, actual_sequences = maf2poagraph.get_poagraph( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.metadatacsv) self.assertEqual(expected_nodes, actual_nodes) self.assertEqual(expected_sequences, actual_sequences)