def test_change_quantification(): """Test if change detection pipeline change at input positions""" # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["A"] * len(COOLS) + ["B"] * len(COOLS_COMP) obs_pos = pah.change_detection_pipeline(cools, conds, bed2d_file=str(DATA / 'A_loops.bed2d'), subsample=False, percentile_thresh=95) diff = obs_pos.diff_score # Check if change was detected in at least half the positions assert len(diff[diff < 0]) >= len(diff) * 0.5
def test_change_quantification(): """Test if change detection pipeline change at input positions""" # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["B"] * len(COOLS) + ["S"] * len(COOLS_COMP) obs_pos = pah.change_detection_pipeline( cools, conds, bed2d_file=str(DATA / "B_loops.bed2d"), subsample=False, ) diff = obs_pos.diff_score # Check if change was detected in the correct direction (disappearing) # some positions assert len(diff[diff < 0]) >= len(diff) * 0.3
def test_change_no_threshold(kernel): """Test if change detection pipeline without threshold reports all input positions""" # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["B"] * len(COOLS) + ["S"] * len(COOLS_COMP) bed2d = pd.read_csv(str(DATA / f"B_{kernel}.bed2d"), sep="\t") obs_pos = pah.change_detection_pipeline( cools, conds, bed2d_file=str(DATA / f"B_{kernel}.bed2d"), subsample=False, pearson_thresh=None, kernel=kernel, ) diff = obs_pos.diff_score # Check if diff scores are returned for all positions assert len(diff[~np.isnan(diff)]) == bed2d.shape[0]
def test_change_detection(): """Test if change detection pipeline finds some relevant positions""" # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["A"] * len(COOLS) + ["B"] * len(COOLS_COMP) obs_pos = pah.change_detection_pipeline(cools, conds, subsample=False, percentile_thresh=95) # Build a set of fuzzy (+/3 pixels around) positions found valid_pos = set() for pos in obs_pos.loc[:, ['bin1', 'bin2']].values: for shift in it.combinations(range(-3, 4), 2): valid_pos.add((pos[0] + shift[0], pos[1] + shift[1])) # Count the number of real loop positions that were found found = 0 for target in LOOPS: if tuple(target.astype(int)) in valid_pos: found += 1 assert found / LOOPS.shape[0] >= 0.5
def test_change_detection(kernel, coords): """Test if change detection pipeline finds some relevant positions""" # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["B"] * len(COOLS) + ["S"] * len(COOLS_COMP) obs_pos = pah.change_detection_pipeline(cools, conds, subsample=False, kernel=kernel) # Build a set of fuzzy (+/3 pixels around) positions found valid_pos = set() for pos in obs_pos.loc[:, ["bin1", "bin2"]].values: for shift in it.product(range(-3, 4), range(-3, 4)): valid_pos.add((pos[0] + shift[0], pos[1] + shift[1])) # Count the number of real loop positions that were found found = 0 for target in coords: if tuple(target.astype(int)) in valid_pos: found += 1 print(f"Found {found} out of {coords.shape[0]} loops.") assert found / coords.shape[0] >= 0.5
import cooler import matplotlib.pyplot as plt DATA = pathlib.Path("data_test") # Synthetic matrices and their known loop coordinates COOLS = [str(c) for c in DATA.glob("B_[1-6]*.cool")] LOOPS = np.loadtxt(DATA / "B_loops.txt") # Matrices with a diagonal gradient COOLS_COMP = [str(c) for c in DATA.glob("smooth_[1-6]*.cool")] # Run loop change detection between matrices with and without loops cools = COOLS + COOLS_COMP conds = ["B"] * len(COOLS) + ["S"] * len(COOLS_COMP) obs_pos = pah.change_detection_pipeline( cools, conds, min_dist=50000, subsample=False, ) # Build a set of fuzzy (+/3 pixels around) positions found fuzzy_obs = set() for pos in obs_pos.loc[:, ["bin1", "bin2"]].values: for shift in it.product(range(-3, 4), range(-3, 4)): fuzzy_obs.add((pos[0] + shift[0], pos[1] + shift[1])) # Same for targets valid_pos = set() for pos in LOOPS: for shift in it.product(range(-3, 4), range(-3, 4)): valid_pos.add((pos[0] + shift[0], pos[1] + shift[1])) # Count the number of real loop positions that were found found = 0 for target in LOOPS: