def test_get_overlapping_intervals(self): overlapping = self.peaks.get_overlapping_intervals( Peak(3, 3, [1, 2], self.graph)) self.assertTrue(len(overlapping), 1) overlapping = self.peaks.get_overlapping_intervals( Peak(3, 3, [1, 6], self.graph)) self.assertTrue(len(overlapping), 2)
def test_get_identical_intervals(self): identical = self.peaks.get_identical_intervals( PeakCollection([Peak(2, 3, [1, 2, 3, 4], self.graph)])) self.assertEqual(len(identical), 0) identical = self.peaks.get_identical_intervals( PeakCollection([Peak(3, 3, [1, 2, 3, 4], self.graph)])) self.assertEqual(len(identical), 1)
def setUp(self): self.graph = Graph({i: Block(3) for i in range(1, 7)}, {i: [i + 1] for i in range(1, 6)}) self.peaks = PeakCollection([ Peak(3, 3, [1, 2, 3, 4], self.graph), Peak(3, 3, [5, 6], self.graph) ])
def test_multiple_peak(): graph = complicated_offset() pileup = SparseValues([0, 1, 7, 8, 10, 12, 14], [0, 1, 0, 1, 0, 1, 0]) score_pileup = SparseValues([0, 4], [5, 6]) score_pileup.track_size = 20 pileup.track_size = 20 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() max_paths.sort(key=lambda x: x.region_paths[0]) print(max_paths) assert max_paths == [ Peak(1, 1, [101, 103, 104], graph=graph), Peak(0, 2, [105, 107], graph=graph) ]
def test_concatenate_fasta_files(self): if os.path.isfile("out.fasta"): os.remove("out.fasta") file1 = open("1_sequences.fasta", "w") peak1 = Peak(3, 5, [1], score=3) peak1_header = ">peak1 " + peak1.to_file_line() + "\n" file1.writelines([peak1_header]) file1.writelines(["AACC\n"]) file1.close() file2 = open("2_sequences.fasta", "w") peak2 = Peak(2, 5, [1], score=7) peak2_header = ">peak2 " + peak2.to_file_line() + "\n" file2.writelines([peak2_header]) file2.writelines(["CCAA\n"]) file2.close() run_argument_parser(["concatenate_sequence_files", "1,2", "out.fasta"]) outfile = open("out.fasta") lines = list(outfile.readlines()) self.assertEqual(Peak.from_file_line(lines[0].split(maxsplit=1)[1]), peak2) self.assertEqual(Peak.from_file_line(lines[2].split(maxsplit=1)[1]), peak1) outfile.close()
def test_approx_contains(self): peaks = PeakCollection( [Peak(3, 3, [1, 2, 3, 4]), Peak(3, 3, [-10, 11])]) peaks.create_node_index() self.assertTrue(peaks.approx_contains_part_of_interval(Peak(1, 2, [1]))) self.assertTrue( peaks.approx_contains_part_of_interval(Peak(1, 2, [10]))) self.assertFalse( peaks.approx_contains_part_of_interval(Peak(1, 2, [100])))
def test_simple_peak(): pileup = SparseValues([0, 5, 35], [False, True, False]) score_pileup = SparseValues([0, 5, 10, 20, 30, 35], [0, 1, 2, 3, 4, 0]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() assert max_paths == [Peak(5, 5, [1, 3, 4], graph=graph)]
def test_trailing_zeros(): graph = offsetgraph() pileup = SparseValues([0, 5, 35], [False, True, False]) score_pileup = SparseValues([0, 15, 30], [0, 10, 0]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() print(max_paths) assert max_paths == [Peak(5, 5, [101, 103, 104], graph=graph)]
def test_offset_end_peak(): graph = offsetgraph() pileup = SparseValues([0, 85], [False, True]) score_pileup = SparseValues([0, 85], [0, 10]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() print(max_paths) assert max_paths == [Peak(5, 10, [109, 110], graph=graph)]
def test_convert_to_approx_linear_peaks(self): graph = Graph({i: Block(3) for i in range(1, 10)}, { 1: [2], 2: [3], 3: [4], 4: [5], 5: [6], 6: [7, 8], 7: [9], 9: [9] }) graph.convert_to_numpy_backend() linear_interval = Interval(0, 3, [2, 4, 8, 9], graph) linear_interval = linear_interval.to_numpy_indexed_interval() peaks = PeakCollection([Peak(2, 2, [2, 3, 4]), Peak(1, 1, [3, 4, 5])]) linear_peaks = peaks.to_approx_linear_peaks(linear_interval, "chr4") linear_peaks = linear_peaks.peaks print(linear_peaks) self.assertEqual(linear_peaks[0], NonGraphPeak("chr4", 2, 5)) self.assertEqual(linear_peaks[1], NonGraphPeak("chr4", 3, 3))
def test_get_summits(self): qvalues = SparseValues(np.array([0]), np.array([3])) qvalues.track_size = 22 qvalues.to_sparse_files("tests/test_qvalues") run_argument_parser([ "create_ob_graph", "-o", "tests/testgraph.obg", "tests/vg_test_graph.json" ]) max_paths = PeakCollection([Peak(0, 2, [1, 2], score=3)]) PeakFasta(self.correct_sequence_graph).write_max_path_sequences( "tests/test_max_paths.fasta", max_paths) run_argument_parser([ "get_summits", "-g", "tests/testgraph.obg", "tests/test_max_paths.fasta", "tests/test_qvalues", "2" ]) result = PeakCollection.from_fasta_file( "tests/test_max_paths_summits.fasta") self.assertEqual(result.intervals[0], Peak(2, 6, [1])) self.assertEqual(result.intervals[0].sequence.lower(), "tccc")
def test_intervals_to_fasta_from_fasta(self): run_argument_parser([ "create_ob_graph", "-o", "tests/testgraph.obg", "tests/vg_test_graph.json" ]) PeakCollection([Peak(0, 2, [1, 2], score=3) ]).to_file("tests/testintervals.intervalcollection", text_file=True) run_argument_parser([ "peaks_to_fasta", "tests/testgraph.obg.sequences", "tests/testintervals.intervalcollection", "tests/testsequences.fasta" ]) collection = PeakCollection.from_fasta_file( "tests/testsequences.fasta") self.assertEqual(len(collection.intervals), 1) self.assertEqual(collection.intervals[0].sequence.lower(), "tttcccctt")
def motif_location(): peak = Peak(1, 4, [1, 3, 4], graph(), unique_id="peak1") return MotifLocation(peak, 2, 11)
def test_contains_interval(self): self.assertTrue(self.peaks.contains_interval(Peak(3, 3, [1, 2, 3, 4]))) self.assertFalse(self.peaks.contains_interval(Peak(2, 3, [1, 2, 3, 4])))
def test_get_similar_intervals(self): similar = self.peaks.get_similar_intervals( Peak(2, 3, [1, 2, 3, 4], self.graph), 1) self.assertTrue(len(similar) == 1) self.assertEqual(similar[0], self.peaks.intervals[0])
def test_old_maxpath(sparse_max_paths): sparse_max_paths._variant_maps = None max_paths = sparse_max_paths.run()[0] true_max_paths = [Peak(5, 2, [11, 13, 14]), Peak(8, 2, [14, 15, 16, 17])] max_paths.sort(key=lambda peak: peak.region_paths[0]) assert max_paths == true_max_paths
def test_maxpath_novar(sparse_max_paths_nonvar): max_paths = sparse_max_paths_nonvar.run()[0] true_max_paths = [Peak(5, 2, [11, 12, 14]), Peak(8, 2, [14, 16, 17])] max_paths.sort(key=lambda peak: peak.region_paths[0]) assert max_paths == true_max_paths
def test_maxpath(sparse_max_paths): max_paths = sparse_max_paths.run()[0] print(max_paths) true_max_paths = [Peak(5, 2, [11, 13, 14]), Peak(8, 2, [14, 17])] max_paths.sort(key=lambda peak: peak.region_paths[0]) assert max_paths == true_max_paths