def test_offset(complicated_offset): pileup = SparseValues( # 1 1 2--3 4 5---6 7 8 np.array([0, 1, 2, 4, 6, 8, 10, 12, 14], dtype="int"), np.array([1, 0, 1, 0, 1, 0, 1, 0, 1], dtype="bool")) holes = HolesCleaner(complicated_offset, pileup, 3).run() true = SparseValues([0, 8, 10], [1, 0, 1]) assert holes == true
def test_internals(): graph = obg.GraphWithReversals({101: obg.Block(100)}, {101: []}) pileup = SparseValues([0, 10, 19, 30, 41, 50], [1, 0, 1, 0, 1, 0]) cleaned = HolesCleaner(graph, pileup, 10).run() true = SparseValues([0, 30, 41, 50], [1, 0, 1, 0]) assert cleaned == true
def test_single_peak(): indices = np.array([0, 3, 32], dtype="int") values = np.array([False, True, False], dtype="bool") pileup = SparseValues(indices, values) indices = np.arange(0, 40, 2) values = (np.arange(0, 40, 2) + 1) % 5 score_pileup = SparseValues(indices, values) score_pileup.track_size = 100 graph.node_indexes = np.arange(0, 110, 10) max_paths = SparseMaxPaths(pileup, graph, score_pileup) print(max_paths.run())
def convert_old_sparse(old_sparse): graph = old_sparse.graph indices = [0] values = [0] for key in sorted(old_sparse.data.keys()): vi = old_sparse.data[key] node_idx = graph.node_indexes[key - graph.min_node] indices.append(node_idx) values.append(vi.start_value) indices.extend(vi.indexes + node_idx) values.extend(vi.values) sv = SparseValues(indices, values, sanitize=True) sv.track_size = graph.node_indexes[-1] return sv
def test_long_hole(): pileup = SparseValues([0, 5, 95], [True, False, True]) graph = obg.GraphWithReversals({i: obg.Block(1) for i in range(1, 101)}, {i: [i + 1] for i in range(1, 100)}) holes = HolesCleaner(graph, pileup, 56).run() assert holes == pileup
def test_sample_equals_control_one_node(self): sample = from_intervals(self.graph, [Interval(0, 3, [2])]) control = from_intervals(self.graph, [Interval(0, 3, [1, 2])]) finder = PValuesFinder(sample, control) p_values = finder.get_p_values_pileup() correct = SparseValues([0, 3, 6], [0, -np.log10(0.26424), 0]) self.assertEqual(p_values, correct)
def test_holes_cleaner(): indices = np.array( [80, 100, 180, 220, 240, 250, 300, 400, 500, 520, 610, 810]) values = np.array([(i % 2) for i, _ in enumerate(indices)]) pileup = SparseValues(indices, values) graph = obg.GraphWithReversals({i + 1: obg.Block(100) for i in range(10)}, {i: [i + 1] for i in range(1, 10)}) # graph.node_indexes = np.arange(0, 1001, 100) holes = HolesCleaner(graph, pileup, 10).run() print(holes)
def test_simple_peak(): pileup = SparseValues([0, 5, 35], [False, True, False]) score_pileup = SparseValues([0, 5, 10, 20, 30, 35], [0, 1, 2, 3, 4, 0]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() assert max_paths == [Peak(5, 5, [1, 3, 4], graph=graph)]
def test_get_summits(self): qvalues = SparseValues(np.array([0]), np.array([3])) qvalues.track_size = 22 qvalues.to_sparse_files("tests/test_qvalues") run_argument_parser([ "create_ob_graph", "-o", "tests/testgraph.obg", "tests/vg_test_graph.json" ]) max_paths = PeakCollection([Peak(0, 2, [1, 2], score=3)]) PeakFasta(self.correct_sequence_graph).write_max_path_sequences( "tests/test_max_paths.fasta", max_paths) run_argument_parser([ "get_summits", "-g", "tests/testgraph.obg", "tests/test_max_paths.fasta", "tests/test_qvalues", "2" ]) result = PeakCollection.from_fasta_file( "tests/test_max_paths_summits.fasta") self.assertEqual(result.intervals[0], Peak(2, 6, [1])) self.assertEqual(result.intervals[0].sequence.lower(), "tccc")
def test_offset_end_peak(): graph = offsetgraph() pileup = SparseValues([0, 85], [False, True]) score_pileup = SparseValues([0, 85], [0, 10]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() print(max_paths) assert max_paths == [Peak(5, 10, [109, 110], graph=graph)]
def test_trailing_zeros(): graph = offsetgraph() pileup = SparseValues([0, 5, 35], [False, True, False]) score_pileup = SparseValues([0, 15, 30], [0, 10, 0]) score_pileup.track_size = 100 pileup.track_size = 100 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() print(max_paths) assert max_paths == [Peak(5, 5, [101, 103, 104], graph=graph)]
def test_to_from_file(self): indexes = np.array([1, 2, 3, 4, 5, 6, 7, 8]) values = np.array([1, 2, 3, 4, 5, 6, 7, 8]) sv = SparseValues(indexes, values) sv.track_size = 10 sv.to_sparse_files("test_sparsevalues.tmp") new = sv.from_sparse_files("test_sparsevalues.tmp") self.assertEqual(sv, new)
def test_multiple_peak(): graph = complicated_offset() pileup = SparseValues([0, 1, 7, 8, 10, 12, 14], [0, 1, 0, 1, 0, 1, 0]) score_pileup = SparseValues([0, 4], [5, 6]) score_pileup.track_size = 20 pileup.track_size = 20 max_paths = SparseMaxPaths(pileup, graph, score_pileup).run() max_paths.sort(key=lambda x: x.region_paths[0]) print(max_paths) assert max_paths == [ Peak(1, 1, [101, 103, 104], graph=graph), Peak(0, 2, [105, 107], graph=graph) ]
import logging logging.basicConfig(level=logging.DEBUG) from offsetbasedgraph import Graph, NumpyIndexedInterval from offsetbasedgraph.vcfmap import load_variant_maps from graph_peak_caller.postprocess.maxpaths import SparseMaxPaths from graph_peak_caller.sparsediffs import SparseValues from graph_peak_caller.peakcollection import PeakCollection chrom = sys.argv[1] fragment_length = int(sys.argv[2]) ref = NumpyIndexedInterval.from_file("/data/bioinf/tair2/" + chrom + "_linear_pathv2.interval") graph = Graph.from_file("/data/bioinf/tair2/" + chrom + ".nobg") direct = SparseValues.from_sparse_files(chrom + "_direct_pileup") filtered_peaks = SparseValues.from_sparse_files(chrom + "_hole_cleaned") variant_map = load_variant_maps(chrom, "/data/bioinf/tair2/") max_paths, sub_graphs = SparseMaxPaths(filtered_peaks, graph, direct, ref, variant_map).run() long_maxpaths = [path for path in max_paths if path.length() >= fragment_length] for max_path in long_max_paths: assert max_path.length() > 0, "Max path %s has negative length" % max_path score = np.max(self.q_values.get_interval_values(max_path)) max_path.set_score(score) assert not np.isnan(score), "Score %s is nan" % score PeakCollection(long_maxpaths).to_file(chrom + "_max_paths.intervalcollection", text_file=True)
def nonvar_pileup(): sv = SparseValues([0, 15, 45, 50, 60, 70], [0, 1, 4, 1, 3, 5]) sv.track_size = 80 return sv
def pileup(): sv = SparseValues([0, 15, 30, 45, 50, 60, 70], [0, 1, 2, 6, 2, 3, 6]) sv.track_size = 80 return sv
def test_end_hole(): pileup = SparseValues([0, 5, 10], [False, True, False]) graph = obg.GraphWithReversals({1: obg.Block(12)}, {1: []}) holes = HolesCleaner(graph, pileup, 4).run() assert holes == pileup
def areas(): sv = SparseValues([0, 15, 42, 48, 72], [0, 1, 0, 1, 0]) sv.track_size = 80 return sv
def test_non_touched_mid(small_graph): touched_nodes = set([101, 102, 103, 104, 105, 106]) pileup = SparseValues([0, 4, 22, 28, 56], [1, 0, 1, 0, 1]) cleaned = HolesCleaner(small_graph, pileup, 20, touched_nodes).run() true = SparseValues([0, 30, 40], [1, 0, 1]) assert cleaned == true
def test_touched_edge(small_graph): touched_nodes = set([101, 103, 106]) pileup = SparseValues([0, 4, 22, 28, 56], [1, 0, 1, 0, 1]) cleaned = HolesCleaner(small_graph, pileup, 4, touched_nodes).run() assert cleaned == pileup
def offset_areas(): sv = SparseValues([0, 10, 11, 20, 29, 31], [1, 0, 1, 0, 1, 0]) sv.track_size = 80 return sv