def test_runs(): dregion = (10, 20) i = [1 if dregion[0] <= x <= dregion[1] else 0 for x in range(100)] assert runs(i, lambda x: x > 0) == [dregion] assert runs_gte(i, 1) == [dregion] assert runs_gte(i, 1, 20) == [] assert runs_gte([], 1) == [] assert runs([], lambda x: True) == [] i = [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1] ir = [(4, 8), (14, 16)] assert runs(i, lambda x: x > 0) == ir assert runs_gte(i, 1) == ir assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir i = [0] * 100 assert runs(i, lambda x: x > 0) == [] assert runs_gte(i, 1) == [] assert runs_gte_uint8(np.uint8(i), 1) == [] i = [1] * 100 ir = [(0, 99)] assert runs(i, lambda x: x > 0) == ir assert runs_gte(i, 1) == ir assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir
def test_runs(): dregion = (10,20) i = [1 if dregion[0] <= x <= dregion[1] else 0 for x in range(100)] assert runs(i, lambda x: x>0) == [dregion] assert runs_gte(i, 1) == [dregion] assert runs_gte(i, 1, 20) == [] assert runs_gte([], 1) == [] assert runs([], lambda x: True) == [] i = [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1] ir = [(4,8), (14, 16)] assert runs(i, lambda x: x > 0) == ir assert runs_gte(i, 1) == ir assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir i = [0] * 100 assert runs(i, lambda x: x > 0) == [] assert runs_gte(i, 1) == [] assert runs_gte_uint8(np.uint8(i),1) == [] i = [1] * 100 ir = [(0,99)] assert runs(i, lambda x: x>0) == ir assert runs_gte(i, 1) == ir assert runs_gte_uint8(np.array(i, dtype=np.uint8), 1) == ir
def make_intervals(ibdarray): ibdarray = np.array(ibdarray, dtype=np.uint8) ibdarray = ibdarray.copy() # Get the intervals that are IBD=2 and remove them from the array ibd2_tracts = [x for x in runs_gte_uint8(ibdarray, 2)] for start, stop in ibd2_tracts: ibdarray[start:(stop + 1)] -= 1 # Now get the remaining IBD=1 tracts and remove them from the array ibd1_tracts = [x for x in runs_gte_uint8(ibdarray, 1)] for start, stop in ibd1_tracts: ibdarray[start:(stop + 1)] -= 1 return ibd1_tracts + ibd2_tracts
def make_intervals(ibdarray): ibdarray = np.array(ibdarray, dtype=np.uint8) ibdarray = ibdarray.copy() # Get the intervals that are IBD=2 and remove them from the array ibd2_tracts = [x for x in runs_gte_uint8(ibdarray, 2)] for start, stop in ibd2_tracts: ibdarray[start:(stop + 1)] -= 1 # Now get the remaining IBD=1 tracts and remove them from the array ibd1_tracts = [x for x in runs_gte_uint8(ibdarray, 1)] for start, stop in ibd1_tracts: ibdarray[start:(stop + 1)] -= 1 return ibd1_tracts + ibd2_tracts
def _process_segments(identical, min_seg=100, min_val=1, chromobj=None, min_density=100, size_unit='mb', min_length=1, maxmiss=0.25): # IBD segments are long runs of identical genotypes ibd = runs_gte_uint8(identical, min_val, minlength=min_seg) if not ibd: return ibd # Genotype errors are things that happen. If theres a small gap between # two IBD segments, we'll chalk that up to a genotyping error and join # them together. ibd = join_gaps(ibd, max_gap=2) if chromobj: ibd = filter_segments(chromobj, ibd, identical, min_length=min_length, size_unit=size_unit, min_density=min_density, maxmiss=maxmiss) return ibd
def _process_segments(identical, min_seg=100, min_val=1, chromobj=None, min_density=100, size_unit='mb', min_length=1, maxmiss=0.25): # IBD segments are long runs of identical genotypes ibd = runs_gte_uint8(identical, min_val, minlength=min_seg) if not ibd: return ibd # Genotype errors are things that happen. If theres a small gap between # two IBD segments, we'll chalk that up to a genotyping error and join # them together. ibd = join_gaps(ibd, max_gap=2) if chromobj: ibd = filter_segments(chromobj, ibd, identical, min_length=min_length, size_unit=size_unit, min_density=min_density, maxmiss=maxmiss) return ibd