def test_create_from_nongraphpeakcollection(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10) }, { 1: [2], 2: [3] }) graph.convert_to_numpy_backend() linear_path = Interval(0, 10, [1, 2, 3], graph) linear_path = linear_path.to_numpy_indexed_interval() nongraph_peaks = NonGraphPeakCollection([ NonGraphPeak("chr1", 3, 10, 5), NonGraphPeak("chr1", 13, 15, 7), ]) peaks = PeakCollection.create_from_nongraph_peak_collection( graph, nongraph_peaks, linear_path, None) self.assertEqual(peaks.intervals[0], Interval(3, 10, [1])) self.assertEqual(peaks.intervals[1], Interval(3, 5, [2])) peaks = PeakCollection.create_from_nongraph_peak_collection( graph, nongraph_peaks, linear_path, LinearRegion("chr1", 3, 20)) self.assertEqual(peaks.intervals[0], Interval(0, 7, [1])) self.assertEqual(peaks.intervals[1], Interval(0, 2, [2]))
def setUp(self): self.correct_ob_graph = GraphWithReversals( { 1: Block(7), 2: Block(4), 3: Block(7), 4: Block(4) }, { 1: [2, 3], 2: [4], 3: [4] }) self.correct_ob_graph.convert_to_numpy_backend() self.correct_sequence_graph = SequenceGraph.create_empty_from_ob_graph( self.correct_ob_graph) self.correct_sequence_graph.set_sequences_using_vg_json_graph( "tests/vg_test_graph.json") remove_files = [ "tests/testgraph.obg", "tests/test_linear_map_starts.pickle", "tests/test_linear_map_ends.pickle", "tests/test_linear_map.length", "tests/sample.intervalcollection", "tests/testintervals.intervalcollection", "tests/testsequences.fasta", "tests/node_range_test_data/vg_alignments_1.json" "tests/node_range_test_data/vg_alignments_2.json" "tests/node_range_test_data/vg_alignments_3.json" "tests/node_range_test_data/vg_alignments_4.json" "tests/node_range_test_data/vg_alignments_5.json" ] for file in remove_files: if os.path.isfile(file): os.remove(file)
def test_find_max_path_on_start_and_end_node(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) peak = ConnectedAreas(graph, { 2: [0, 10], 4: [0, 10], }) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_intervals(graph, [Interval(7, 2, [1, 2, 4])]) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) max_path = scored_peak.get_max_path() self.assertEqual(max_path, Interval(0, 10, [2, 4]))
def test_find_max_path_through_subgraph_multiple_paths(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) peak = ConnectedAreas(graph, { 2: [0, 10], 3: [0, 10], 1: [5, 10], 4: [0, 3] }) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_intervals( graph, [ Interval(7, 2, [1, 3, 4]) # Giving higher qvalue # through this path ]) print(qvalues) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) print(scored_peak) max_path = scored_peak.get_max_path() self.assertEqual(max_path, Interval(5, 3, [1, 3, 4]))
def simple_test(): graph = Graph({ 1: Block(10), 2: Block(1), 3: Block(1), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) graph.convert_to_numpy_backend() sequence_graph = SequenceGraph.create_empty_from_ob_graph(graph) sequence_graph.set_sequence(1, "GGGTTTATAC") sequence_graph.set_sequence(2, "A") sequence_graph.set_sequence(3, "C") sequence_graph.set_sequence(4, "GTACATTGTA") linear_ref = Interval(0, 10, [1, 2, 3], graph) linear_ref = linear_ref.to_numpy_indexed_interval() critical_nodes = set([4]) finder = MinimizerFinder(graph, sequence_graph, critical_nodes, linear_ref, k=3, w=3) minimizers = finder.find_minimizers() assert minimizers.has_minimizer(2, 0) assert minimizers.has_minimizer(3, 0) assert minimizers.has_minimizer(4, 4)
def test_finds_correct_max_path_among_many_paths(self): graph = GraphWithReversals( { 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10), 5: Block(10) }, { 1: [2, 3, 4], 2: [5], 4: [5], 3: [5] }) pileup = SparsePileup(graph) pileup.data = { 1: ValuedIndexes([], [], 2, 10), # Higher qval, but two holes with low 2: ValuedIndexes([1, 2, 7, 8], [0, 2.001, 0, 2.001], 2, 10), 3: ValuedIndexes([], [], 1.5, 10), 4: ValuedIndexes([], [], 2, 10), 5: ValuedIndexes([], [], 2, 10) } self._assert_finds_max_paths([Interval(0, 10, [1, 4, 5])], graph, pileup)
def set_graph(self): self.graph = Graph({ 1: Block(5), 2: Block(5), 3: Block(5) }, { 1: [2], 2: [3] })
def set_graph(self): self.graph = Graph({ 1: Block(5), 2: Block(5), 3: Block(5), 4: Block(5) }, { 1: [2, 3], 2: [4], 3: [4] })
def setUp(self): self.linear_graph = Graph({i: Block(5) for i in range(1, 4)}, {i: [i + 1] for i in range(1, 3)}) self.scores = DensePileup.from_intervals( self.linear_graph, [Interval(0, 5, [i]) for i in range(1, 4)]) self.graph = Graph({i: Block(5) for i in range(1, 4)}, { 1: [3], 2: [3], 3: [4] })
def test_three_nodes_in(self): graph = Graph({i: Block(5) for i in range(1, 5)}, { 1: [4], 2: [4], 3: [4] }) intervals = [ Interval(2, 5, [1]), Interval(2, 5, [2]), Interval(2, 5, [3]), Interval(0, 3, [4]) ] pileup = DensePileup.from_intervals(graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( graph, pileup) print(subgraphs) correct1 = BinaryContinousAreas(graph) correct1.add_start(-1, 3) correct1.add_start(-2, 3) correct1.add_start(-3, 3) correct1.add_start(4, 3) self.assertTrue(correct1 in subgraphs)
def test_simple3(self): graph = Graph({i: Block(5) for i in range(1, 6)}, { 1: [3], 2: [3], 3: [4, 5] }) scores = DensePileup.from_intervals( graph, [Interval(0, 5, [i]) for i in range(1, 6)]) intervals = [ Interval(0, 5, [1]), Interval(0, 5, [3]), Interval(0, 5, [4]), Interval(0, 3, [5]) ] pileup = DensePileup.from_intervals(graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( graph, pileup) scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, scores) for peak in subgraphs) max_paths = [peak.get_max_path() for peak in scored_peaks] self.assertTrue( Interval(0, 5, [1, 3, 4]) in max_paths or Interval(0, 3, [1, 3, 5]) in max_paths)
def test_find_max_path_through_subgraph_two_node_graph(self): graph = Graph({1: Block(10), 2: Block(10)}, {1: [2]}) peak = ConnectedAreas(graph, {2: [0, 4], 1: [5, 10]}) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_base_value(graph, 10) print("q values") print(qvalues) print(qvalues.data._values) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) print(scored_peak) max_path = scored_peak.get_max_path() self.assertEqual(max_path, Interval(5, 4, [1, 2]))
def test_build_non_nested(self): builder = SnarlGraphBuilder(self.simple_graph, self.simple_snarls, id_counter=9) graph = builder.build_snarl_graphs() correct_snarl_graph = SnarlGraph( { 1: Block(3), 20: SnarlGraph({ 2: Block(3), 3: Block(3) }, { 1: [2, 3], 2: [4], 3: [4] }, start_node=1, end_node=4), 4: Block(3), 5: Block(3), 21: SnarlGraph({ 6: Block(3), 7: Block(3) }, { 5: [6, 7], 6: [8], 7: [8] }, start_node=5, end_node=8), 8: Block(3) }, { 1: [20], 20: [4], 4: [5], 5: [21], 21: [8], 9: [1], # Dummy nodes added by snarlgraphbuilder 8: [10] # Dummy nodes added by snarlgraphbuilder }, start_node=9, end_node=10) print("Final graph") print(graph) self.assertEqual(correct_snarl_graph, graph)
def test_reverse(): graph = Graph({ 1: Block(10), 2: Block(5), 3: Block(10), 4: Block(5) }, { 1: [2, 3], 2: [4], 3: [4] }) graph.convert_to_numpy_backend() linear_path = NumpyIndexedInterval.from_interval( Interval(0, 10, [1, 2, 4], graph)) alignments = [Interval(4, 5, [-3, -1], graph)] projected = project_alignments(alignments, linear_path) projected = list(projected) assert projected[0] == (5, 16, "-")
def setUp(self): self.graph = Graph({i: Block(3) for i in range(1, 7)}, {i: [i + 1] for i in range(1, 6)}) self.peaks = PeakCollection([ Peak(3, 3, [1, 2, 3, 4], self.graph), Peak(3, 3, [5, 6], self.graph) ])
def test_many_nodes(): nodes = {i: Block(1) for i in range(2, 10)} nodes[1] = Block(10) nodes[10] = Block(10) graph = Graph( nodes, { 1: [2, 3], 2: [4], 3: [4], 4: [5, 6], 5: [7], 6: [7], 7: [8, 9], 8: [10], 9: [10] }) graph.convert_to_numpy_backend() sequence_graph = SequenceGraph.create_empty_from_ob_graph(graph) sequence_graph.set_sequence(1, "ACTGACTGAC") sequence_graph.set_sequence(10, "ACTGACTGAC") sequence_graph.set_sequence(2, "A") sequence_graph.set_sequence(3, "C") sequence_graph.set_sequence(4, "A") sequence_graph.set_sequence(5, "G") sequence_graph.set_sequence(6, "C") sequence_graph.set_sequence(7, "T") sequence_graph.set_sequence(8, "A") sequence_graph.set_sequence(9, "A") linear_ref = Interval(0, 10, [1, 2, 4, 6, 7, 8, 10], graph) linear_ref = linear_ref.to_numpy_indexed_interval() critical_nodes = {1, 4, 7, 10} finder = MinimizerFinder(graph, sequence_graph, critical_nodes, linear_ref, k=3, w=3) minimizers = finder.find_minimizers() print(len(minimizers.minimizers))
def test_simple(): graph = Graph({ 1: Block(10), 2: Block(5), 3: Block(10), 4: Block(5) }, { 1: [2, 3], 2: [4], 3: [4] }) graph.convert_to_numpy_backend() linear_path = NumpyIndexedInterval.from_interval( Interval(0, 10, [1, 2, 4], graph)) alignments = [Interval(5, 5, [1, 3], graph), Interval(5, 5, [3, 4], graph)] projected = project_alignments(alignments, linear_path) projected = list(projected) assert projected[0] == (5, 15, "+") assert projected[1] == (15, 25, "+")
def set_graph(self): self.fragment_length = 6 self.read_length = 2 blocks = {i: Block(3) for i in range(1, 11)} blocks[11] = Block(1000) self.graph = GraphWithReversals( blocks, { 1: [2, 3], 2: [7, 8], 3: [4, 5], 4: [6], 5: [6], 6: [10], 7: [9], 8: [9], 9: [10], 10: [11] }) LinearMap.from_graph(self.graph).to_file("test_linear_map.npz")
def set_graph(self): self.fragment_length = 5 self.read_length = 1 self.graph = GraphWithReversals({i: Block(15) for i in range(1, 5)}, { 1: [2, 3], 2: [4], 3: [4] }) LinearMap.from_graph(self.graph).to_file("test_linear_map.npz")
def test_find_max_path_through_subgraph_with_illegal_paths(self): graph = Graph( { 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10) }, { 1: [2, 3], 2: [4], -4: [-3] # Making 3=>4 not allowed path }) peak = ConnectedAreas(graph, { 2: [0, 10], 3: [0, 10], 1: [5, 10], 4: [0, 8] }) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_intervals( graph, [ Interval(0, 10, [3]), # Higher value on 3 than 2 Interval(0, 10, [3]), Interval(0, 10, [4]), # Highest value if ending on 4 Interval(0, 10, [4]), Interval(0, 10, [1]), # Highest value if inncluding 1 Interval(0, 10, [1]), # Highest value if inncluding 1 Interval(0, 10, [1, 2, 4]) ]) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) max_path = scored_peak.get_max_path() print(max_path) self.assertEqual(max_path, Interval(5, 8, [1, 2, 4]))
def _create_graph_with_linear_blocks(self): blocks = { i: Block(self.n_basepairs_length) for i in range(100, 100 + self.n_paths) } graph = GraphWithReversals(blocks, {}) # Add dummy blocks at start and end start = Block(1) end = Block(1) for block in graph.blocks: graph.adj_list[1].append(block) graph.reverse_adj_list[block].append(1) graph.adj_list[block].append(2) graph.reverse_adj_list[2].append(block) graph.blocks[1] = start graph.blocks[2] = end self.graph = graph self.translation = Translation({}, {}, graph)
def test_many_nodes(): nodes = {i: Block(1) for i in range(2, 10)} nodes[1] = Block(10) nodes[10] = Block(10) graph = Graph( nodes, { 1: [2, 3], 2: [4], 3: [4], 4: [5, 6], 5: [7], 6: [7], 7: [8, 9], 8: [10], 9: [10] }) graph.convert_to_numpy_backend() sequence_graph = SequenceGraph.create_empty_from_ob_graph(graph) sequence_graph.set_sequence(1, "ACTGACTGAC") sequence_graph.set_sequence(10, "ACTGACTGAC") sequence_graph.set_sequence(2, "A") sequence_graph.set_sequence(3, "C") sequence_graph.set_sequence(4, "A") sequence_graph.set_sequence(5, "G") sequence_graph.set_sequence(6, "C") sequence_graph.set_sequence(7, "T") sequence_graph.set_sequence(8, "T") sequence_graph.set_sequence(9, "A") linear_ref_nodes = {1, 2, 4, 6, 7, 8, 10} read_sequence = "ACTGACCAGTAACTGAC" start_node = 1 start_offset = 4 aligner = LocalGraphAligner(graph, sequence_graph, read_sequence, linear_ref_nodes, start_node, start_offset) alignment, score = aligner.align() assert alignment == [1, 3, 4, 5, 7, 9, 10]
def test_special_case(self): graph = Graph({i: Block(3) for i in range(1, 7)}, { 1: [2, 3], 2: [4], 4: [5], 5: [6] }) intervals = [Interval(0, 3, [1, 2, 4, 5, 6]), Interval(0, 3, [3])] pileup = SparsePileup.from_intervals(graph, intervals) collection = SubgraphCollection.from_pileup(graph, pileup) print(collection.subgraphs)
def test_get_p_value_track(self): graph = one_block_graph sample_intervals = [ Interval(1, 10, [1], graph), Interval(5, 7, [1], graph) ] control_intervals = [Interval(0, 10, [1], graph)] sample = Pileup(graph) sample.add_intervals(sample_intervals) control = Pileup(graph) control.add_intervals(control_intervals) self._test_get_p_value_track(graph, sample, control) # case 2 graph = offsetbasedgraph.Graph({1: Block(10), 2: Block(10)}, {1: [2]}) sample_intervals = [ Interval(1, 10, [1], graph), Interval(5, 7, [1], graph), Interval(4, 6, [1], graph), Interval(0, 10, [2], graph), Interval(1, 3, [2], graph) ] control_intervals = [ Interval(0, 10, [1], graph), Interval(0, 10, [2], graph), Interval(5, 5, [1, 2], graph) ] sample = Pileup(graph) sample.add_intervals(sample_intervals) control = Pileup(graph) control.add_intervals(control_intervals) self._test_get_p_value_track(graph, sample, control)
def setUp(self): self.simple_graph = GraphWithReversals( { 1: Block(3), 2: Block(3), 3: Block(3) }, { 1: [2], 2: [3] }) self.reversed_simple_graph = GraphWithReversals( { 1: Block(3), 2: Block(3), 3: Block(3) }, { -2: [-1], -3: [-2] }) self.simple_graphs = [self.simple_graph, self.reversed_simple_graph] self.graph2 = Graph({ 1: Block(3), 2: Block(3) }, { -2: [1], }) self.graph3 = Graph({1: Block(3), 2: Block(3)}, {2: [-1]}) areas = {2: np.array([0, 3])} self.middle_areas = ConnectedAreas(self.simple_graph, areas) self.middle_closed_area = ConnectedAreas(self.simple_graph, {2: np.array([1, 2])}) self.middle_left_area = ConnectedAreas(self.simple_graph, {2: np.array([0, 2])})
def setUp(self): self.graph = Graph({i: Block(10) for i in range(1, 4)}, {i: [i + 1] for i in range(1, 3)}) self.index = GraphIndex({ 1: [(2, 10), (3, 20)], 2: [(3, 10)], 3: [], -1: [], -2: [(-1, 10)], -3: [(-2, 10), (-1, 20)] }) self.extender = GraphExtender(self.index)
def setUp(self): self.complex_graph = Graph( {i: Block(3) for i in range(1, 13)}, { 1: [2, 3], 2: [7, 8], 3: [4, 5], 4: [6], 5: [6], 6: [10], 7: [9], 8: [9], 9: [10], 10: [12] }) self.complex_graph.convert_to_numpy_backend()
def setUp(self): self.graph = GraphWithReversals({i: Block(3) for i in range(1, 12)}, { 1: [2, 3], 2: [7, 8], 3: [4, 5], 4: [6], 5: [6], 6: [10], 7: [9], 8: [9], 9: [10], 10: [11] }) self.linear_length = 18 LinearMap.from_graph(self.graph).to_file("test_linear_map.npz")
def create_linear_graph(self): nodes = {i + 1: Block(self.node_size) for i in range(0, self.n_nodes)} adj_list = {i: [i + 1] for i in range(1, self.n_nodes)} self.graph = GraphWithReversals(nodes, adj_list) self.graph.to_file(self.GRAPH_NAME) snarlbuilder = SnarlGraphBuilder(self.graph, snarls={ self.n_nodes + 2: SimpleSnarl(1, self.n_nodes, id=self.n_nodes + 2) }, id_counter=self.n_nodes + 3) self.snarlgraph = snarlbuilder.build_snarl_graphs() self.linear_map = LinearSnarlMap.from_snarl_graph( self.snarlgraph, self.graph) self.linear_map.to_json_files(self.MAP_NAME)
def setUp(self): self.graph = Graph({i: Block(10) for i in range(1, 5)}, { 1: [2, 3], 2: [4], 3: [4] }) self.index = GraphIndex({ 1: [(2, 10), (3, 10), (4, 20)], 2: [(4, 10)], 3: [(4, 10)], 4: [], -1: [], -2: [(-1, 10)], -3: [(-1, 10)], -4: [(-2, 10), (-3, 10), (-1, 20)] }) self.extender = GraphExtender(self.index)