def background_chr1(): c = """Chromosome Start End Strand chr1 1 4 + chr1 2 5 -""" return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))
def test_simple_bed_with_scores(simple_bed, expected_result_simple_bed_values): result = coverage(simple_bed, value_col="Value") print(result.runs, expected_result_simple_bed_values.runs) print(result.values, expected_result_simple_bed_values.values) assert list(result.runs) == list(expected_result_simple_bed_values.runs) assert np.allclose(result.values, expected_result_simple_bed_values.values)
def chip_chr1(): c = """Chromosome Start End Strand chr1 5 7 + chr1 3 10 -""" return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))
def test_coverage_simple(simple): result = coverage(simple, value_col="Score") print(result) assert result == Rle([1, 2], [-1, 1])
def test_simple_bed(simple_bed, expected_result_simple_bed): result = coverage(simple_bed) print(result.runs, expected_result_simple_bed.runs) print(result.values, expected_result_simple_bed.values) assert list(result.runs) == list(expected_result_simple_bed.runs) assert list(result.values) == list(expected_result_simple_bed.values)
def test_roundtrip_to_ranges_single_rle_teensy_duplicated( teensy_duplicated, expected_result_teensy_duplicated): gr = teensy_duplicated cv = coverage(teensy_duplicated) # print(cv.values) starts, ends, values = _to_ranges(cv) # print(gr) # print(pr.PyRanges(gr.df.drop_duplicates())) # print("len(starts)", len(starts)) # print("starts") # print(starts[:5]) # print(starts[-5:]) # print("ends") # print(ends[:5]) # print(ends[-5:]) assert list(starts) == [ 0, 42058716, 42058741, 42130511, 42130536, 42593165, 42593190, 42635413, 42635438, 43357333, 43357358, 43854685 ] assert list(ends) == [ 42058716, 42058741, 42130511, 42130536, 42593165, 42593190, 42635413, 42635438, 43357333, 43357358, 43854685, 43854710 ]
def test_coverage(df, expected_result_coverage): expected_runs, expected_values = expected_result_coverage # result = coverage(df) result = coverage(df) print(result.runs) print(result.values) print(len(result.runs)) print(len(result.values)) print(expected_runs) print(expected_values) assert list(result.runs) == expected_runs assert list(result.values) == expected_values
def test_roundtrip_to_ranges_single_rle_teensy(teensy): cv = coverage(teensy, value_col="Score") starts, ends, values = _to_ranges(cv) df = pd.concat([pd.Series(a) for a in [starts, ends, values]], axis=1) df.columns = "Start End Score".split() df.insert(0, "Chromosome", "chr2") gr = pr.PyRanges(df) # print(", ".join(str(i) for i in starts)) # print(", ".join(str(f) for f in ends)) assert list(starts) == [ 0, 13611, 13636, 32620, 32645, 33241, 33266, 1150665 ] assert list(ends) == [ 13611, 13636, 32620, 32645, 33241, 33266, 1150665, 1150690 ]
def test_roundtrip_to_ranges_single_rle_overlapping(overlapping_gr): gr = overlapping_gr print(gr) cv = coverage(gr) print(cv) starts, ends, values = _to_ranges(cv) df = pd.concat([pd.Series(a) for a in [starts, ends, cv.values]], axis=1) df.columns = "Start End Score".split() df.insert(0, "Chromosome", "chr2") print(pr.PyRanges(df)) # print(gr) # print(pr.PyRanges(gr.df.drop_duplicates())) # print("len(starts)", len(starts)) # print("starts") # print(starts[:5]) # print("ends") # print(ends[:5]) assert list(starts) == [0, 1, 4, 7] assert list(ends) == [1, 4, 7, 10]
def chip(): c = """Chromosome Start End Strand chr2 1 3 +""" return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))