def test_three_nodes_in(self): graph = Graph({i: Block(5) for i in range(1, 5)}, { 1: [4], 2: [4], 3: [4] }) intervals = [ Interval(2, 5, [1]), Interval(2, 5, [2]), Interval(2, 5, [3]), Interval(0, 3, [4]) ] pileup = DensePileup.from_intervals(graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( graph, pileup) print(subgraphs) correct1 = BinaryContinousAreas(graph) correct1.add_start(-1, 3) correct1.add_start(-2, 3) correct1.add_start(-3, 3) correct1.add_start(4, 3) self.assertTrue(correct1 in subgraphs)
def test_find_max_path_through_subgraph_multiple_paths(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) peak = ConnectedAreas(graph, { 2: [0, 10], 3: [0, 10], 1: [5, 10], 4: [0, 3] }) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_intervals( graph, [ Interval(7, 2, [1, 3, 4]) # Giving higher qvalue # through this path ]) print(qvalues) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) print(scored_peak) max_path = scored_peak.get_max_path() self.assertEqual(max_path, Interval(5, 3, [1, 3, 4]))
def test_create_from_nongraphpeakcollection(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10) }, { 1: [2], 2: [3] }) graph.convert_to_numpy_backend() linear_path = Interval(0, 10, [1, 2, 3], graph) linear_path = linear_path.to_numpy_indexed_interval() nongraph_peaks = NonGraphPeakCollection([ NonGraphPeak("chr1", 3, 10, 5), NonGraphPeak("chr1", 13, 15, 7), ]) peaks = PeakCollection.create_from_nongraph_peak_collection( graph, nongraph_peaks, linear_path, None) self.assertEqual(peaks.intervals[0], Interval(3, 10, [1])) self.assertEqual(peaks.intervals[1], Interval(3, 5, [2])) peaks = PeakCollection.create_from_nongraph_peak_collection( graph, nongraph_peaks, linear_path, LinearRegion("chr1", 3, 20)) self.assertEqual(peaks.intervals[0], Interval(0, 7, [1])) self.assertEqual(peaks.intervals[1], Interval(0, 2, [2]))
def test_find_max_path_on_start_and_end_node(self): graph = Graph({ 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) peak = ConnectedAreas(graph, { 2: [0, 10], 4: [0, 10], }) binary_peak = BinaryContinousAreas.from_old_areas(peak) qvalues = DensePileup.from_intervals(graph, [Interval(7, 2, [1, 2, 4])]) scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues) max_path = scored_peak.get_max_path() self.assertEqual(max_path, Interval(0, 10, [2, 4]))
def test_single_fragment(self): self.correct_pileup = from_intervals( self.graph, [Interval(0, 5, [1, 2, 3]), Interval(0, 5, [1, 2, 3])]) self.fragments = [Interval(0, 5, [1, 2, 3])] self.do_asserts()
def simple_test(): graph = Graph({ 1: Block(10), 2: Block(1), 3: Block(1), 4: Block(10) }, { 1: [2, 3], 2: [4], 3: [4] }) graph.convert_to_numpy_backend() sequence_graph = SequenceGraph.create_empty_from_ob_graph(graph) sequence_graph.set_sequence(1, "GGGTTTATAC") sequence_graph.set_sequence(2, "A") sequence_graph.set_sequence(3, "C") sequence_graph.set_sequence(4, "GTACATTGTA") linear_ref = Interval(0, 10, [1, 2, 3], graph) linear_ref = linear_ref.to_numpy_indexed_interval() critical_nodes = set([4]) finder = MinimizerFinder(graph, sequence_graph, critical_nodes, linear_ref, k=3, w=3) minimizers = finder.find_minimizers() assert minimizers.has_minimizer(2, 0) assert minimizers.has_minimizer(3, 0) assert minimizers.has_minimizer(4, 4)
def test_find_valued_areas(self): pileup = DensePileup.from_intervals( graph, [Interval(2, 10, [1]), Interval(0, 10, [3])]) valued_areas = pileup.find_valued_areas(1) self.assertEqual(valued_areas[1], [2, 10]) self.assertEqual(valued_areas[3], [0, 10]) self.assertEqual(valued_areas[2], [])
def test_sample_equals_control_one_node(self): sample = from_intervals(self.graph, [Interval(0, 3, [2])]) control = from_intervals(self.graph, [Interval(0, 3, [1, 2])]) finder = PValuesFinder(sample, control) p_values = finder.get_p_values_pileup() correct = SparseValues([0, 3, 6], [0, -np.log10(0.26424), 0]) self.assertEqual(p_values, correct)
def test_special_case2(self): pileup = DensePileup.from_intervals( graph, [Interval(0, 3, [1]), Interval(5, 10, [2])]) cleaner = DagHoleCleaner(pileup, 3) left_holes = cleaner.get_left_side_of_holes() self.assertEqual(left_holes, [(1, 3), (3, 0)])
def _create_intervals_around_peak_position(self, node_id, offset): linear_interval = Interval(int(offset - self.peak_size / 2), int(offset + self.peak_size / 2), [node_id]) for i in range(0, self.n_reads_at_peak): self._sample_linear_reads.append(linear_interval.copy()) self.n_sample_reads += 1 self.linear_peaks.append(linear_interval)
def test_get_interval_values(self): pileup = DensePileup.from_intervals( graph, [Interval(5, 5, [1, 2], graph), Interval(7, 3, [1, 2], graph)]) values = pileup.data.get_interval_values(Interval(5, 5, [1, 2], graph)) self.assertTrue(np.all(values == [1, 1, 2, 2, 2, 2, 2, 2, 1, 1])) values = pileup.data.get_interval_values(Interval(3, 6, [1], graph)) self.assertTrue(np.all(values == [0, 0, 1]))
def _read_alignments(self): if self.alignment_file_name.endswith(".json"): self.alignments = vg_json_file_to_interval_collection(self.alignment_file_name).intervals elif self.alignment_file_name.endswith(".graphnodes"): self.alignments = (Interval(0, 1, [int(n) for n in line.strip().split()[1].split(",")]) for line in open(self.alignment_file_name)) elif self.alignment_file_name.endswith(".graphalignments"): self.alignments = (Interval.from_file_line(line.strip().split("\t")[1]) for line in open(self.alignment_file_name) if line.strip().split("\t")[1] != ".") else: self.alignments = IntervalCollection.from_file(self.alignment_file_name).intervals
def test_count_unique_reads(self): reads = [ IntervalCollection([ Interval(4, 10, [1, 2, 3]), Interval(4, 5, [1]), Interval(5, 5, [1]), Interval(6, 2, [-3, -2, -1]) ]) ] unique = MultipleGraphsCallpeaks.count_number_of_unique_reads(reads) self.assertEqual(unique, 3)
def test_single_peak(self): pileup = DensePileup.from_intervals(graph, [Interval(0, 3, [1])]) pileup.threshold(0.5) cleaner = DagHoleCleaner(pileup, 3) pileup = cleaner.run() correct_pileup = DensePileup.from_intervals(graph, [Interval(0, 6, [1])]) self.assertEqual(pileup, correct_pileup)
def __test_fill_small_holes_non_dag_simple(self): pileup = DensePileup.from_intervals( graph, [Interval(1, 8, [1]), Interval(2, 3, [2]), Interval(9, 4, [2, 3])]) pileup.fill_small_wholes(4) correct_pileup = DensePileup.from_intervals( graph, [Interval(1, 3, [1, 2]), Interval(9, 4, [2, 3])]) self.assertEqual(pileup, correct_pileup)
def test_find_max_path_on_split_graph(self): pileup = SparsePileup(self.split_graph) pileup.data = { 1: ValuedIndexes([], [], 2, 10), 2: ValuedIndexes([], [], 3, 10), 3: ValuedIndexes([], [], 2, 10), 4: ValuedIndexes([1, 4], [0, 3], 2, 10) } self._assert_finds_max_paths( [Interval(0, 1, [1, 2, 4]), Interval(4, 10, [4])], self.split_graph, pileup)
def test_filter_duplicates(self): intervals = [ Interval(0, 10, [1, 2, 3]), Interval(1, 10, [1, 2, 3]), Interval(0, 10, [1, 2, 3]) ] interval_collection = IntervalCollection(intervals) intervals_filtered = list(UniqueIntervals(interval_collection)) self.assertEqual(len(intervals_filtered), len(intervals) - 1) self.assertEqual(intervals_filtered[0], intervals[0]) self.assertEqual(intervals_filtered[1], intervals[1])
def test_single_peak_split_graph(self): pileup = DensePileup.from_intervals(split_graph, [Interval(0, 10, [1])]) pileup.threshold(0.5) cleaner = DagHoleCleaner(pileup, 5) pileup = cleaner.run() correct_pileup = DensePileup.from_intervals( split_graph, [Interval(0, 5, [1, 2]), Interval(0, 5, [3])]) self.assertEqual(pileup, correct_pileup)
def test_simple(self): intervals = [Interval(1, 2, [1, 2]), Interval(1, 4, [3])] pileup = DensePileup.from_intervals(self.linear_graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( self.linear_graph, pileup) print(subgraphs) scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, self.scores) for peak in subgraphs) max_paths = [peak.get_max_path() for peak in scored_peaks] self.assertTrue(all(interval in max_paths for interval in intervals))
def test_single_hole_dual_rp(self): pileup = DensePileup.from_intervals( graph, [Interval(0, 8, [1]), Interval(3, 7, [2])]) pileup.threshold(0.5) cleaner = DagHoleCleaner(pileup, 5) pileup = cleaner.run() correct_pileup = DensePileup.from_intervals( graph, [Interval(0, 2, [1, 2, 3])]) self.assertEqual(pileup, correct_pileup)
def test_multiple_start_and_end_nodes(self): pileup = SparsePileup(self.multi_start_end_graph) pileup.data = { 1: ValuedIndexes([], [], 2, 10), 2: ValuedIndexes([], [], 2.2, 10), 3: ValuedIndexes([1, 9], [2, 0], 0, 10), 4: ValuedIndexes([], [], 2, 10), 5: ValuedIndexes([3], [3], 0, 10), } self._assert_finds_max_paths( [Interval(0, 10, [2, 3, 4]), Interval(3, 10, [5])], self.multi_start_end_graph, pileup)
def test_complex_graph(self): intervals = IntervalCollection([ Interval(0, 3, [1, 3, 4, 6, 10]), Interval(1, 2, [2]), Interval(2, 3, [2]), Interval(0, 3, [7, 9]) ]) haplotyper = HaploTyper(self.complex_graph, intervals) haplotyper.build() max_interval = haplotyper.get_maximum_interval_through_graph() self.assertEqual( max_interval, Interval(0, 3, [1, 2, 7, 9, 10, 12]) )
def test_simple2(self): intervals = [Interval(1, 5, [1]), Interval(1, 2, [2, 3])] pileup = DensePileup.from_intervals(self.graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( self.graph, pileup) #print(subgraphs) scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, self.scores) for peak in subgraphs) max_paths = [peak.get_max_path() for peak in scored_peaks] print(max_paths) self.assertTrue( Interval(1, 2, [1, 3]) in max_paths or Interval(1, 2, [2, 3]) in max_paths)
def test_single_read(self): fragment_length = 3 reads = [Interval(0, 3, [2])] extension_sizes = [8] control = create_control("test_linear_map.tmp", reads, extension_sizes, fragment_length, ob_graph=self.graph) expected_bakground = len(reads) * fragment_length / self.linear_length value_in_extension = 1 * fragment_length / (extension_sizes[0]) correct_pileup = OldSparsePileup.from_base_value( self.graph, expected_bakground) for rp in [2, 3, 1]: correct_pileup.data[rp] = ValuedIndexes([], [], value_in_extension, 3) for rp in [7, 8, 4, 5]: correct_pileup.data[rp] = ValuedIndexes([1], [expected_bakground], value_in_extension, 3) for rp in [11]: correct_pileup.data[rp] = ValuedIndexes([2], [value_in_extension], expected_bakground, 3) self.assertTrue(control.equals_old_sparse_pileup(correct_pileup))
def test_finds_correct_max_path_among_many_paths(self): graph = GraphWithReversals( { 1: Block(10), 2: Block(10), 3: Block(10), 4: Block(10), 5: Block(10) }, { 1: [2, 3, 4], 2: [5], 4: [5], 3: [5] }) pileup = SparsePileup(graph) pileup.data = { 1: ValuedIndexes([], [], 2, 10), # Higher qval, but two holes with low 2: ValuedIndexes([1, 2, 7, 8], [0, 2.001, 0, 2.001], 2, 10), 3: ValuedIndexes([], [], 1.5, 10), 4: ValuedIndexes([], [], 2, 10), 5: ValuedIndexes([], [], 2, 10) } self._assert_finds_max_paths([Interval(0, 10, [1, 4, 5])], graph, pileup)
def test_single_read_two_extensions(self): fragment_length = 3 reads = [Interval(0, 3, [2])] extension_sizes = [2, 8] control = SparseControl("test_linear_map.npz", self.graph, extension_sizes, fragment_length, set(self.graph.blocks.keys())).create(reads) expected_bakground = len(reads) * fragment_length / self.linear_length value_in_extensions = 1 * fragment_length / (np.array(extension_sizes)) control = control.to_dense_pileup(3 * 11) correct_pileup = expected_bakground * np.ones(3 * 11) for rp in [2, 3]: idx = rp - 1 correct_pileup[idx * 3:(idx + 1) * 3] = [ value_in_extensions[0], value_in_extensions[1], value_in_extensions[1] ] correct_pileup[0:3] = [ value_in_extensions[1], value_in_extensions[1], value_in_extensions[0] ] for rp in [7, 8, 4, 5]: idx = rp - 1 correct_pileup[idx * 3:(idx + 1) * 3] = [ value_in_extensions[1], expected_bakground, expected_bakground ] self.assertTrue(np.allclose(control, correct_pileup))
def test_simple_two_peaks(self): intervals = [Interval(2, 5, [1]), Interval(0, 5, [3])] pileup = DensePileup.from_intervals(self.linear_graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( self.linear_graph, pileup) print(subgraphs) correct1 = BinaryContinousAreas(self.linear_graph) correct1.add_start(-1, 3) correct2 = BinaryContinousAreas(self.linear_graph) correct2.add_full(3) self.assertTrue(correct1 in subgraphs) self.assertTrue(correct2 in subgraphs)
def fill_small_wholes(self, max_size, write_holes_to_file=None, touched_nodes=None): cleaner = HolesCleaner(self, max_size, touched_nodes=touched_nodes) areas = cleaner.run() n_filled = 0 hole_intervals = [] for node_id in areas.areas: if touched_nodes is not None: if node_id not in touched_nodes: continue starts = areas.get_starts(node_id) ends = areas.get_ends(node_id) for start, end in zip(starts, ends): self.data[node_id].set_interval_value(start, end, True) logging.debug("Filling hole %s, %d, %d" % (node_id, start, end)) n_filled += 1 assert end - start <= max_size hole_intervals.append(Interval(start, end, [node_id])) logging.info("Filled %d small holes (splitted into holes per node)" % n_filled) if write_holes_to_file is not None: intervals = IntervalCollection(hole_intervals) intervals.to_file(write_holes_to_file, text_file=True) self.sanitize()
def test_simple3(self): graph = Graph({i: Block(5) for i in range(1, 6)}, { 1: [3], 2: [3], 3: [4, 5] }) scores = DensePileup.from_intervals( graph, [Interval(0, 5, [i]) for i in range(1, 6)]) intervals = [ Interval(0, 5, [1]), Interval(0, 5, [3]), Interval(0, 5, [4]), Interval(0, 3, [5]) ] pileup = DensePileup.from_intervals(graph, intervals) subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup( graph, pileup) scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, scores) for peak in subgraphs) max_paths = [peak.get_max_path() for peak in scored_peaks] self.assertTrue( Interval(0, 5, [1, 3, 4]) in max_paths or Interval(0, 3, [1, 3, 5]) in max_paths)
def _test_with_reversal_and_hole(self): pileup = SparsePileup(self.graph_with_reversal) pileup.data = { 1: ValuedIndexes([], [], 2, 10), 2: ValuedIndexes([9], [0], 2, 10), 3: ValuedIndexes([1], [3], 0, 10), } self._assert_finds_max_paths([Interval(0, 10, [-3, -2])], self.graph_with_reversal, pileup)