Exemple #1
0
def test_runs():
    dregion = (10, 20)
    i = [1 if dregion[0] <= x <= dregion[1] else 0 for x in range(100)]
    assert runs(i, lambda x: x > 0) == [dregion]
    assert runs_gte(i, 1) == [dregion]
    assert runs_gte(i, 1, 20) == []
    assert runs_gte([], 1) == []
    assert runs([], lambda x: True) == []

    i = [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1]
    ir = [(4, 8), (14, 16)]
    assert runs(i, lambda x: x > 0) == ir
    assert runs_gte(i, 1) == ir
    assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir

    i = [0] * 100
    assert runs(i, lambda x: x > 0) == []
    assert runs_gte(i, 1) == []
    assert runs_gte_uint8(np.uint8(i), 1) == []

    i = [1] * 100
    ir = [(0, 99)]
    assert runs(i, lambda x: x > 0) == ir
    assert runs_gte(i, 1) == ir
    assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir
Exemple #2
0
def test_runs():
    dregion = (10,20)
    i = [1 if dregion[0] <= x <= dregion[1] else 0 for x in range(100)] 
    assert runs(i, lambda x: x>0) == [dregion]
    assert runs_gte(i, 1) == [dregion]
    assert runs_gte(i, 1, 20) == []
    assert runs_gte([], 1) == []
    assert runs([], lambda x: True) == []
    

    i = [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1]
    ir = [(4,8), (14, 16)]
    assert runs(i, lambda x: x > 0) == ir
    assert runs_gte(i, 1) == ir
    assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir

    i = [0] * 100
    assert runs(i, lambda x: x > 0) == []
    assert runs_gte(i, 1) == []
    assert runs_gte_uint8(np.uint8(i),1) == []

    i = [1] * 100
    ir = [(0,99)]
    assert runs(i, lambda x: x>0) == ir
    assert runs_gte(i, 1) == ir
    assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir
Exemple #3
0
def make_intervals(ibdarray):
    ibdarray = np.array(ibdarray, dtype=np.uint8)
    ibdarray = ibdarray.copy()

    # Get the intervals that are IBD=2 and remove them from the array
    ibd2_tracts = [x for x in runs_gte_uint8(ibdarray, 2)]
    for start, stop in ibd2_tracts:
        ibdarray[start:(stop + 1)] -= 1

    # Now get the remaining IBD=1 tracts and remove them from the array
    ibd1_tracts = [x for x in runs_gte_uint8(ibdarray, 1)]
    for start, stop in ibd1_tracts:
        ibdarray[start:(stop + 1)] -= 1

    return ibd1_tracts + ibd2_tracts
Exemple #4
0
def make_intervals(ibdarray):
    ibdarray = np.array(ibdarray, dtype=np.uint8)
    ibdarray = ibdarray.copy()

    # Get the intervals that are IBD=2 and remove them from the array
    ibd2_tracts = [x for x in runs_gte_uint8(ibdarray, 2)]
    for start, stop in ibd2_tracts:
        ibdarray[start:(stop + 1)] -= 1

    # Now get the remaining IBD=1 tracts and remove them from the array
    ibd1_tracts = [x for x in runs_gte_uint8(ibdarray, 1)]
    for start, stop in ibd1_tracts:
        ibdarray[start:(stop + 1)] -= 1

    return ibd1_tracts + ibd2_tracts
Exemple #5
0
def _process_segments(identical,
                      min_seg=100,
                      min_val=1,
                      chromobj=None,
                      min_density=100,
                      size_unit='mb',
                      min_length=1,
                      maxmiss=0.25):
    # IBD segments are long runs of identical genotypes
    ibd = runs_gte_uint8(identical, min_val, minlength=min_seg)

    if not ibd:
        return ibd

    # Genotype errors are things that happen. If theres a small gap between
    # two IBD segments, we'll chalk that up to a genotyping error and join
    # them together.
    ibd = join_gaps(ibd, max_gap=2)

    if chromobj:
        ibd = filter_segments(chromobj,
                              ibd,
                              identical,
                              min_length=min_length,
                              size_unit=size_unit,
                              min_density=min_density,
                              maxmiss=maxmiss)

    return ibd
Exemple #6
0
def _process_segments(identical, min_seg=100, min_val=1, chromobj=None,
                      min_density=100, size_unit='mb',
                      min_length=1, maxmiss=0.25):
    # IBD segments are long runs of identical genotypes
    ibd = runs_gte_uint8(identical, min_val, minlength=min_seg)

    if not ibd:
        return ibd

    # Genotype errors are things that happen. If theres a small gap between
    # two IBD segments, we'll chalk that up to a genotyping error and join
    # them together.
    ibd = join_gaps(ibd, max_gap=2)

    if chromobj:
        ibd = filter_segments(chromobj, ibd, identical,
                              min_length=min_length,
                              size_unit=size_unit,
                              min_density=min_density,
                              maxmiss=maxmiss)

    return ibd