def test_three_nodes_in(self):
        graph = Graph({i: Block(5)
                       for i in range(1, 5)}, {
                           1: [4],
                           2: [4],
                           3: [4]
                       })

        intervals = [
            Interval(2, 5, [1]),
            Interval(2, 5, [2]),
            Interval(2, 5, [3]),
            Interval(0, 3, [4])
        ]
        pileup = DensePileup.from_intervals(graph, intervals)

        subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup(
            graph, pileup)
        print(subgraphs)

        correct1 = BinaryContinousAreas(graph)
        correct1.add_start(-1, 3)
        correct1.add_start(-2, 3)
        correct1.add_start(-3, 3)
        correct1.add_start(4, 3)

        self.assertTrue(correct1 in subgraphs)
Beispiel #2
0
    def test_find_max_path_through_subgraph_multiple_paths(self):

        graph = Graph({
            1: Block(10),
            2: Block(10),
            3: Block(10),
            4: Block(10)
        }, {
            1: [2, 3],
            2: [4],
            3: [4]
        })

        peak = ConnectedAreas(graph, {
            2: [0, 10],
            3: [0, 10],
            1: [5, 10],
            4: [0, 3]
        })

        binary_peak = BinaryContinousAreas.from_old_areas(peak)
        qvalues = DensePileup.from_intervals(
            graph,
            [
                Interval(7, 2, [1, 3, 4])  # Giving higher qvalue
                # through this path
            ])

        print(qvalues)

        scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues)
        print(scored_peak)

        max_path = scored_peak.get_max_path()
        self.assertEqual(max_path, Interval(5, 3, [1, 3, 4]))
    def test_create_from_nongraphpeakcollection(self):

        graph = Graph({
            1: Block(10),
            2: Block(10),
            3: Block(10)
        }, {
            1: [2],
            2: [3]
        })
        graph.convert_to_numpy_backend()
        linear_path = Interval(0, 10, [1, 2, 3], graph)
        linear_path = linear_path.to_numpy_indexed_interval()

        nongraph_peaks = NonGraphPeakCollection([
            NonGraphPeak("chr1", 3, 10, 5),
            NonGraphPeak("chr1", 13, 15, 7),
        ])

        peaks = PeakCollection.create_from_nongraph_peak_collection(
            graph, nongraph_peaks, linear_path, None)

        self.assertEqual(peaks.intervals[0], Interval(3, 10, [1]))
        self.assertEqual(peaks.intervals[1], Interval(3, 5, [2]))

        peaks = PeakCollection.create_from_nongraph_peak_collection(
            graph, nongraph_peaks, linear_path, LinearRegion("chr1", 3, 20))
        self.assertEqual(peaks.intervals[0], Interval(0, 7, [1]))
        self.assertEqual(peaks.intervals[1], Interval(0, 2, [2]))
Beispiel #4
0
    def test_find_max_path_on_start_and_end_node(self):

        graph = Graph({
            1: Block(10),
            2: Block(10),
            3: Block(10),
            4: Block(10)
        }, {
            1: [2, 3],
            2: [4],
            3: [4]
        })

        peak = ConnectedAreas(graph, {
            2: [0, 10],
            4: [0, 10],
        })

        binary_peak = BinaryContinousAreas.from_old_areas(peak)
        qvalues = DensePileup.from_intervals(graph,
                                             [Interval(7, 2, [1, 2, 4])])
        scored_peak = ScoredPeak.from_peak_and_pileup(binary_peak, qvalues)

        max_path = scored_peak.get_max_path()
        self.assertEqual(max_path, Interval(0, 10, [2, 4]))
    def test_single_fragment(self):

        self.correct_pileup = from_intervals(
            self.graph, [Interval(0, 5, [1, 2, 3]),
                         Interval(0, 5, [1, 2, 3])])
        self.fragments = [Interval(0, 5, [1, 2, 3])]
        self.do_asserts()
Beispiel #6
0
    def simple_test():
        graph = Graph({
            1: Block(10),
            2: Block(1),
            3: Block(1),
            4: Block(10)
        }, {
            1: [2, 3],
            2: [4],
            3: [4]
        })
        graph.convert_to_numpy_backend()

        sequence_graph = SequenceGraph.create_empty_from_ob_graph(graph)
        sequence_graph.set_sequence(1, "GGGTTTATAC")
        sequence_graph.set_sequence(2, "A")
        sequence_graph.set_sequence(3, "C")
        sequence_graph.set_sequence(4, "GTACATTGTA")

        linear_ref = Interval(0, 10, [1, 2, 3], graph)
        linear_ref = linear_ref.to_numpy_indexed_interval()

        critical_nodes = set([4])

        finder = MinimizerFinder(graph,
                                 sequence_graph,
                                 critical_nodes,
                                 linear_ref,
                                 k=3,
                                 w=3)
        minimizers = finder.find_minimizers()
        assert minimizers.has_minimizer(2, 0)
        assert minimizers.has_minimizer(3, 0)
        assert minimizers.has_minimizer(4, 4)
Beispiel #7
0
 def test_find_valued_areas(self):
     pileup = DensePileup.from_intervals(
         graph,
         [Interval(2, 10, [1]), Interval(0, 10, [3])])
     valued_areas = pileup.find_valued_areas(1)
     self.assertEqual(valued_areas[1], [2, 10])
     self.assertEqual(valued_areas[3], [0, 10])
     self.assertEqual(valued_areas[2], [])
    def test_sample_equals_control_one_node(self):
        sample = from_intervals(self.graph, [Interval(0, 3, [2])])
        control = from_intervals(self.graph, [Interval(0, 3, [1, 2])])

        finder = PValuesFinder(sample, control)
        p_values = finder.get_p_values_pileup()
        correct = SparseValues([0, 3, 6], [0, -np.log10(0.26424), 0])
        self.assertEqual(p_values, correct)
    def test_special_case2(self):
        pileup = DensePileup.from_intervals(
            graph,
            [Interval(0, 3, [1]), Interval(5, 10, [2])])

        cleaner = DagHoleCleaner(pileup, 3)
        left_holes = cleaner.get_left_side_of_holes()
        self.assertEqual(left_holes, [(1, 3), (3, 0)])
Beispiel #10
0
    def _create_intervals_around_peak_position(self, node_id, offset):

        linear_interval = Interval(int(offset - self.peak_size / 2),
                                   int(offset + self.peak_size / 2), [node_id])
        for i in range(0, self.n_reads_at_peak):
            self._sample_linear_reads.append(linear_interval.copy())
            self.n_sample_reads += 1

        self.linear_peaks.append(linear_interval)
Beispiel #11
0
    def test_get_interval_values(self):
        pileup = DensePileup.from_intervals(
            graph,
            [Interval(5, 5, [1, 2], graph),
             Interval(7, 3, [1, 2], graph)])
        values = pileup.data.get_interval_values(Interval(5, 5, [1, 2], graph))
        self.assertTrue(np.all(values == [1, 1, 2, 2, 2, 2, 2, 2, 1, 1]))

        values = pileup.data.get_interval_values(Interval(3, 6, [1], graph))
        self.assertTrue(np.all(values == [0, 0, 1]))
Beispiel #12
0
 def _read_alignments(self):
     if self.alignment_file_name.endswith(".json"):
         self.alignments = vg_json_file_to_interval_collection(self.alignment_file_name).intervals
     elif self.alignment_file_name.endswith(".graphnodes"):
         self.alignments = (Interval(0, 1, [int(n) for n in line.strip().split()[1].split(",")])
                            for line in open(self.alignment_file_name))
     elif self.alignment_file_name.endswith(".graphalignments"):
         self.alignments = (Interval.from_file_line(line.strip().split("\t")[1]) for line in open(self.alignment_file_name) if line.strip().split("\t")[1] != ".")
     else:
         self.alignments = IntervalCollection.from_file(self.alignment_file_name).intervals
Beispiel #13
0
 def test_count_unique_reads(self):
     reads = [
         IntervalCollection([
             Interval(4, 10, [1, 2, 3]),
             Interval(4, 5, [1]),
             Interval(5, 5, [1]),
             Interval(6, 2, [-3, -2, -1])
         ])
     ]
     unique = MultipleGraphsCallpeaks.count_number_of_unique_reads(reads)
     self.assertEqual(unique, 3)
    def test_single_peak(self):
        pileup = DensePileup.from_intervals(graph, [Interval(0, 3, [1])])
        pileup.threshold(0.5)

        cleaner = DagHoleCleaner(pileup, 3)
        pileup = cleaner.run()

        correct_pileup = DensePileup.from_intervals(graph,
                                                    [Interval(0, 6, [1])])

        self.assertEqual(pileup, correct_pileup)
Beispiel #15
0
    def __test_fill_small_holes_non_dag_simple(self):
        pileup = DensePileup.from_intervals(
            graph,
            [Interval(1, 8, [1]),
             Interval(2, 3, [2]),
             Interval(9, 4, [2, 3])])

        pileup.fill_small_wholes(4)
        correct_pileup = DensePileup.from_intervals(
            graph, [Interval(1, 3, [1, 2]),
                    Interval(9, 4, [2, 3])])
        self.assertEqual(pileup, correct_pileup)
    def test_find_max_path_on_split_graph(self):

        pileup = SparsePileup(self.split_graph)
        pileup.data = {
            1: ValuedIndexes([], [], 2, 10),
            2: ValuedIndexes([], [], 3, 10),
            3: ValuedIndexes([], [], 2, 10),
            4: ValuedIndexes([1, 4], [0, 3], 2, 10)
        }
        self._assert_finds_max_paths(
            [Interval(0, 1, [1, 2, 4]),
             Interval(4, 10, [4])], self.split_graph, pileup)
Beispiel #17
0
    def test_filter_duplicates(self):
        intervals = [
            Interval(0, 10, [1, 2, 3]),
            Interval(1, 10, [1, 2, 3]),
            Interval(0, 10, [1, 2, 3])
        ]

        interval_collection = IntervalCollection(intervals)
        intervals_filtered = list(UniqueIntervals(interval_collection))

        self.assertEqual(len(intervals_filtered), len(intervals) - 1)
        self.assertEqual(intervals_filtered[0], intervals[0])
        self.assertEqual(intervals_filtered[1], intervals[1])
    def test_single_peak_split_graph(self):
        pileup = DensePileup.from_intervals(split_graph,
                                            [Interval(0, 10, [1])])
        pileup.threshold(0.5)

        cleaner = DagHoleCleaner(pileup, 5)
        pileup = cleaner.run()

        correct_pileup = DensePileup.from_intervals(
            split_graph, [Interval(0, 5, [1, 2]),
                          Interval(0, 5, [3])])

        self.assertEqual(pileup, correct_pileup)
    def test_simple(self):

        intervals = [Interval(1, 2, [1, 2]), Interval(1, 4, [3])]
        pileup = DensePileup.from_intervals(self.linear_graph, intervals)

        subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup(
            self.linear_graph, pileup)
        print(subgraphs)

        scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, self.scores)
                        for peak in subgraphs)
        max_paths = [peak.get_max_path() for peak in scored_peaks]
        self.assertTrue(all(interval in max_paths for interval in intervals))
    def test_single_hole_dual_rp(self):
        pileup = DensePileup.from_intervals(
            graph,
            [Interval(0, 8, [1]), Interval(3, 7, [2])])
        pileup.threshold(0.5)

        cleaner = DagHoleCleaner(pileup, 5)
        pileup = cleaner.run()

        correct_pileup = DensePileup.from_intervals(
            graph, [Interval(0, 2, [1, 2, 3])])

        self.assertEqual(pileup, correct_pileup)
    def test_multiple_start_and_end_nodes(self):

        pileup = SparsePileup(self.multi_start_end_graph)
        pileup.data = {
            1: ValuedIndexes([], [], 2, 10),
            2: ValuedIndexes([], [], 2.2, 10),
            3: ValuedIndexes([1, 9], [2, 0], 0, 10),
            4: ValuedIndexes([], [], 2, 10),
            5: ValuedIndexes([3], [3], 0, 10),
        }

        self._assert_finds_max_paths(
            [Interval(0, 10, [2, 3, 4]),
             Interval(3, 10, [5])], self.multi_start_end_graph, pileup)
Beispiel #22
0
    def test_complex_graph(self):
        intervals = IntervalCollection([
            Interval(0, 3, [1, 3, 4, 6, 10]),
            Interval(1, 2, [2]),
            Interval(2, 3, [2]),
            Interval(0, 3, [7, 9])
        ])
        haplotyper = HaploTyper(self.complex_graph, intervals)
        haplotyper.build()
        max_interval = haplotyper.get_maximum_interval_through_graph()

        self.assertEqual(
            max_interval,
            Interval(0, 3, [1, 2, 7, 9, 10, 12])
        )
    def test_simple2(self):
        intervals = [Interval(1, 5, [1]), Interval(1, 2, [2, 3])]
        pileup = DensePileup.from_intervals(self.graph, intervals)

        subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup(
            self.graph, pileup)
        #print(subgraphs)

        scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, self.scores)
                        for peak in subgraphs)
        max_paths = [peak.get_max_path() for peak in scored_peaks]
        print(max_paths)
        self.assertTrue(
            Interval(1, 2, [1, 3]) in max_paths
            or Interval(1, 2, [2, 3]) in max_paths)
Beispiel #24
0
    def test_single_read(self):
        fragment_length = 3
        reads = [Interval(0, 3, [2])]
        extension_sizes = [8]
        control = create_control("test_linear_map.tmp",
                                 reads,
                                 extension_sizes,
                                 fragment_length,
                                 ob_graph=self.graph)
        expected_bakground = len(reads) * fragment_length / self.linear_length
        value_in_extension = 1 * fragment_length / (extension_sizes[0])

        correct_pileup = OldSparsePileup.from_base_value(
            self.graph, expected_bakground)
        for rp in [2, 3, 1]:
            correct_pileup.data[rp] = ValuedIndexes([], [], value_in_extension,
                                                    3)

        for rp in [7, 8, 4, 5]:
            correct_pileup.data[rp] = ValuedIndexes([1], [expected_bakground],
                                                    value_in_extension, 3)

        for rp in [11]:
            correct_pileup.data[rp] = ValuedIndexes([2], [value_in_extension],
                                                    expected_bakground, 3)

        self.assertTrue(control.equals_old_sparse_pileup(correct_pileup))
    def test_finds_correct_max_path_among_many_paths(self):
        graph = GraphWithReversals(
            {
                1: Block(10),
                2: Block(10),
                3: Block(10),
                4: Block(10),
                5: Block(10)
            }, {
                1: [2, 3, 4],
                2: [5],
                4: [5],
                3: [5]
            })

        pileup = SparsePileup(graph)
        pileup.data = {
            1: ValuedIndexes([], [], 2, 10),
            # Higher qval, but two holes with low
            2: ValuedIndexes([1, 2, 7, 8], [0, 2.001, 0, 2.001], 2, 10),
            3: ValuedIndexes([], [], 1.5, 10),
            4: ValuedIndexes([], [], 2, 10),
            5: ValuedIndexes([], [], 2, 10)
        }
        self._assert_finds_max_paths([Interval(0, 10, [1, 4, 5])], graph,
                                     pileup)
Beispiel #26
0
    def test_single_read_two_extensions(self):
        fragment_length = 3
        reads = [Interval(0, 3, [2])]
        extension_sizes = [2, 8]
        control = SparseControl("test_linear_map.npz", self.graph,
                                extension_sizes, fragment_length,
                                set(self.graph.blocks.keys())).create(reads)

        expected_bakground = len(reads) * fragment_length / self.linear_length
        value_in_extensions = 1 * fragment_length / (np.array(extension_sizes))

        control = control.to_dense_pileup(3 * 11)
        correct_pileup = expected_bakground * np.ones(3 * 11)
        for rp in [2, 3]:
            idx = rp - 1
            correct_pileup[idx * 3:(idx + 1) * 3] = [
                value_in_extensions[0], value_in_extensions[1],
                value_in_extensions[1]
            ]

        correct_pileup[0:3] = [
            value_in_extensions[1], value_in_extensions[1],
            value_in_extensions[0]
        ]

        for rp in [7, 8, 4, 5]:
            idx = rp - 1
            correct_pileup[idx * 3:(idx + 1) * 3] = [
                value_in_extensions[1], expected_bakground, expected_bakground
            ]
        self.assertTrue(np.allclose(control, correct_pileup))
    def test_simple_two_peaks(self):

        intervals = [Interval(2, 5, [1]), Interval(0, 5, [3])]
        pileup = DensePileup.from_intervals(self.linear_graph, intervals)

        subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup(
            self.linear_graph, pileup)
        print(subgraphs)

        correct1 = BinaryContinousAreas(self.linear_graph)
        correct1.add_start(-1, 3)
        correct2 = BinaryContinousAreas(self.linear_graph)
        correct2.add_full(3)

        self.assertTrue(correct1 in subgraphs)
        self.assertTrue(correct2 in subgraphs)
    def fill_small_wholes(self,
                          max_size,
                          write_holes_to_file=None,
                          touched_nodes=None):
        cleaner = HolesCleaner(self, max_size, touched_nodes=touched_nodes)
        areas = cleaner.run()
        n_filled = 0

        hole_intervals = []

        for node_id in areas.areas:
            if touched_nodes is not None:
                if node_id not in touched_nodes:
                    continue

            starts = areas.get_starts(node_id)
            ends = areas.get_ends(node_id)
            for start, end in zip(starts, ends):
                self.data[node_id].set_interval_value(start, end, True)
                logging.debug("Filling hole %s, %d, %d" %
                              (node_id, start, end))
                n_filled += 1
                assert end - start <= max_size
                hole_intervals.append(Interval(start, end, [node_id]))

        logging.info("Filled %d small holes (splitted into holes per node)" %
                     n_filled)

        if write_holes_to_file is not None:
            intervals = IntervalCollection(hole_intervals)
            intervals.to_file(write_holes_to_file, text_file=True)

        self.sanitize()
    def test_simple3(self):
        graph = Graph({i: Block(5)
                       for i in range(1, 6)}, {
                           1: [3],
                           2: [3],
                           3: [4, 5]
                       })
        scores = DensePileup.from_intervals(
            graph, [Interval(0, 5, [i]) for i in range(1, 6)])
        intervals = [
            Interval(0, 5, [1]),
            Interval(0, 5, [3]),
            Interval(0, 5, [4]),
            Interval(0, 3, [5])
        ]
        pileup = DensePileup.from_intervals(graph, intervals)
        subgraphs = SubgraphCollectionPartiallyOrderedGraph.create_from_pileup(
            graph, pileup)
        scored_peaks = (ScoredPeak.from_peak_and_pileup(peak, scores)
                        for peak in subgraphs)
        max_paths = [peak.get_max_path() for peak in scored_peaks]

        self.assertTrue(
            Interval(0, 5, [1, 3, 4]) in max_paths
            or Interval(0, 3, [1, 3, 5]) in max_paths)
 def _test_with_reversal_and_hole(self):
     pileup = SparsePileup(self.graph_with_reversal)
     pileup.data = {
         1: ValuedIndexes([], [], 2, 10),
         2: ValuedIndexes([9], [0], 2, 10),
         3: ValuedIndexes([1], [3], 0, 10),
     }
     self._assert_finds_max_paths([Interval(0, 10, [-3, -2])],
                                  self.graph_with_reversal, pileup)