Exemple #1
0
def background_chr1():

    c = """Chromosome Start End Strand
chr1 1 4 +
chr1 2 5 -"""

    return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))
Exemple #2
0
def test_simple_bed_with_scores(simple_bed, expected_result_simple_bed_values):

    result = coverage(simple_bed, value_col="Value")
    print(result.runs, expected_result_simple_bed_values.runs)
    print(result.values, expected_result_simple_bed_values.values)
    assert list(result.runs) == list(expected_result_simple_bed_values.runs)
    assert np.allclose(result.values, expected_result_simple_bed_values.values)
Exemple #3
0
def chip_chr1():

    c = """Chromosome Start End Strand
chr1 5 7 +
chr1 3 10 -"""

    return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))
Exemple #4
0
def test_coverage_simple(simple):

    result = coverage(simple, value_col="Score")

    print(result)

    assert result == Rle([1, 2], [-1, 1])
Exemple #5
0
def test_simple_bed(simple_bed, expected_result_simple_bed):

    result = coverage(simple_bed)
    print(result.runs, expected_result_simple_bed.runs)
    print(result.values, expected_result_simple_bed.values)
    assert list(result.runs) == list(expected_result_simple_bed.runs)
    assert list(result.values) == list(expected_result_simple_bed.values)
Exemple #6
0
def test_roundtrip_to_ranges_single_rle_teensy_duplicated(
        teensy_duplicated, expected_result_teensy_duplicated):

    gr = teensy_duplicated
    cv = coverage(teensy_duplicated)
    # print(cv.values)

    starts, ends, values = _to_ranges(cv)

    # print(gr)
    # print(pr.PyRanges(gr.df.drop_duplicates()))
    # print("len(starts)", len(starts))
    # print("starts")
    # print(starts[:5])
    # print(starts[-5:])
    # print("ends")
    # print(ends[:5])
    # print(ends[-5:])

    assert list(starts) == [
        0, 42058716, 42058741, 42130511, 42130536, 42593165, 42593190,
        42635413, 42635438, 43357333, 43357358, 43854685
    ]
    assert list(ends) == [
        42058716, 42058741, 42130511, 42130536, 42593165, 42593190, 42635413,
        42635438, 43357333, 43357358, 43854685, 43854710
    ]
Exemple #7
0
def test_coverage(df, expected_result_coverage):

    expected_runs, expected_values = expected_result_coverage

    # result = coverage(df)
    result = coverage(df)

    print(result.runs)
    print(result.values)

    print(len(result.runs))
    print(len(result.values))

    print(expected_runs)
    print(expected_values)

    assert list(result.runs) == expected_runs
    assert list(result.values) == expected_values
Exemple #8
0
def test_roundtrip_to_ranges_single_rle_teensy(teensy):

    cv = coverage(teensy, value_col="Score")

    starts, ends, values = _to_ranges(cv)

    df = pd.concat([pd.Series(a) for a in [starts, ends, values]], axis=1)
    df.columns = "Start End Score".split()
    df.insert(0, "Chromosome", "chr2")

    gr = pr.PyRanges(df)

    # print(", ".join(str(i) for i in starts))
    # print(", ".join(str(f) for f in ends))
    assert list(starts) == [
        0, 13611, 13636, 32620, 32645, 33241, 33266, 1150665
    ]
    assert list(ends) == [
        13611, 13636, 32620, 32645, 33241, 33266, 1150665, 1150690
    ]
Exemple #9
0
def test_roundtrip_to_ranges_single_rle_overlapping(overlapping_gr):

    gr = overlapping_gr
    print(gr)
    cv = coverage(gr)
    print(cv)

    starts, ends, values = _to_ranges(cv)

    df = pd.concat([pd.Series(a) for a in [starts, ends, cv.values]], axis=1)
    df.columns = "Start End Score".split()
    df.insert(0, "Chromosome", "chr2")
    print(pr.PyRanges(df))
    # print(gr)
    # print(pr.PyRanges(gr.df.drop_duplicates()))
    # print("len(starts)", len(starts))
    # print("starts")
    # print(starts[:5])
    # print("ends")
    # print(ends[:5])

    assert list(starts) == [0, 1, 4, 7]
    assert list(ends) == [1, 4, 7, 10]
Exemple #10
0
def chip():

    c = """Chromosome Start End Strand
chr2 1 3 +"""

    return coverage(PyRanges(pd.read_table(StringIO(c), sep="\s+")))