예제 #1
0
 def test_genome_wide_norm(self):
     """Tests functionality to extract Obs/Exp values
     genome-wide with median normalization from synthetic Hi-C data."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     pairing_score = HT.get_pairing_score_obs_exp(cooler_file,
                                                  exp_f,
                                                  50000,
                                                  arms=arms,
                                                  norm=True)
     expected = pd.read_csv(
         "testFiles/test_pairingScore_obsExp_genomeWide_Norm.csv",
         dtype={
             name: pairing_score.dtypes[name]
             for name in pairing_score.dtypes.index
         },
     )
     assert_frame_equal(pairing_score, expected)
예제 #2
0
 def test_no_collapse_real_data(self):
     positions = pd.read_csv("testFiles/testAssignRegions.csv")
     arms = HT.get_arms_hg19()
     cooler_file = cooler.Cooler(
         "testFiles/test3_realdata.mcool::resolutions/50000")
     expected = HT.get_expected(cooler_file, arms)
     result = HT.do_pileup_obs_exp(cooler_file,
                                   expected,
                                   positions,
                                   proc=1,
                                   collapse=False)
     expected = np.load(
         "testFiles/real_data_obsexp_pileup_not_collapsed.npy")
     self.assertTrue(np.allclose(result, expected, equal_nan=True))
예제 #3
0
 def test_center_enrichment(self):
     """Tests center enrichment of sliding diamond
     of a synthetically generated gaussian."""
     x_values, y_values = HT.sliding_diamond(self.gaussian, side_len=6)
     center_mean = np.mean(y_values[np.where(np.abs(x_values) < 1)])
     border_mean = np.mean(y_values[:5])
     self.assertTrue(center_mean > 5 * border_mean)
예제 #4
0
 def test_specific_regions_without_diag(self):  # Simulated cooler
     """Tests functionality to extract ICCF values
     at specific regions from synthetic Hi-C data
     with blanking the main diagonal."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     pairing_score = HT.get_pairing_score(
         cooler_file,
         50000,
         regions=position_frame,
         arms=arms,
         norm=False,
         blank_diag=True,
     )
     expected = pd.read_csv(
         "testFiles/test_pairingScore_ICCF_specificRegions_withoutDiag.csv")
     assert_frame_equal(pairing_score, expected)
예제 #5
0
 def test_genome_wide_norm_without_diag(self):
     """Tests functionality to extract ICCF values
     genome-wide with median normalization from synthetic Hi-C data
     with blanking the main diagonal."""
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     pairing_score = HT.get_pairing_score(cooler_file,
                                          50000,
                                          arms=arms,
                                          norm=True,
                                          blank_diag=True)
     expected = pd.read_csv(
         "testFiles/test_pairingScore_ICCF_genomeWide_withoutDiag_norm.csv",
         dtype={
             name: pairing_score.dtypes[name]
             for name in pairing_score.dtypes.index
         },
         index_col=0,
     )
     assert_frame_equal(pairing_score, expected)
예제 #6
0
 def test_odd_diamond_xnorm(self):
     """Tests sliding a diamond of odd sidelength
     with x normalization for small fixed matrix."""
     x_values, y_values = HT.sliding_diamond(self.test_matrix,
                                             side_len=3,
                                             center_x=True)
     x_check = np.array([-1.5, -0.5, 0.5, 1.5])
     y_check = np.array([3.666666666, 4.11111111, 4.77777777, 2.55555555])
     self.assertTrue(all(np.isclose(x_values, x_check)))
     self.assertTrue(all(np.isclose(y_values, y_check)))
예제 #7
0
 def test_even_diamond(self):
     """Tests sliding a diamond of even sidelength
     for small fixed matrix."""
     x_values, y_values = HT.sliding_diamond(self.test_matrix,
                                             side_len=2,
                                             center_x=False)
     x_check = np.array([0.5, 1.5, 2.5, 3.5, 4.5])
     y_check = np.array([3.5, 4.75, 4.75, 3.25, 2.0])
     self.assertTrue(all(np.isclose(x_values, x_check)))
     self.assertTrue(all(np.isclose(y_values, y_check)))
예제 #8
0
 def test_collapse(self):
     """Tests pileup of synthetic Hi-C data, with collapsing results."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     assigned = HT.assign_regions(50000, 10000, position_frame["chrom"],
                                  position_frame["pos"], arms)
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     result = HT.do_pileup_iccf(cooler_file,
                                assigned,
                                proc=1,
                                collapse=True)
     expected = np.load("testFiles/test_pileups_iccf_collapse.npy")
     self.assertTrue(np.allclose(result, expected))
예제 #9
0
 def test_odd_diamond(self):
     """Tests sliding a diamond of odd sidelength
     for small fixed matrix."""
     x_values, y_values = HT.sliding_diamond(self.test_matrix,
                                             side_len=3,
                                             center_x=False)
     x_check = np.array([1, 2, 3, 4])
     y_check = np.array([3.666666666, 4.11111111, 4.77777777, 2.55555555])
     self.assertTrue(all(np.isclose(x_values, x_check)))
     self.assertTrue(all(np.isclose(y_values, y_check)))
예제 #10
0
 def test_collapse_real_data(self):
     """tests whether pileup works on real data."""
     positions = pd.read_csv("testFiles/testAssignRegions.csv")
     cooler_file = cooler.Cooler(
         "testFiles/test3_realdata.mcool::resolutions/50000")
     result = HT.do_pileup_iccf(cooler_file,
                                positions,
                                proc=1,
                                collapse=True)
     expected = np.load("testFiles/real_data_iccf_pileup_collapsed.npy")
     self.assertTrue(np.allclose(result, expected))
예제 #11
0
 def test_even_diamond_xnorm(self):
     """Tests sliding a diamond of even sidelength
     with x normalization (center is set to 0) for
     small fixed matrix.."""
     x_values, y_values = HT.sliding_diamond(self.test_matrix,
                                             side_len=2,
                                             center_x=True)
     x_check = np.array([-2, -1, 0, 1, 2])
     y_check = np.array([3.5, 4.75, 4.75, 3.25, 2.0])
     self.assertTrue(all(np.isclose(x_values, x_check)))
     self.assertTrue(all(np.isclose(y_values, y_check)))
예제 #12
0
 def test_expected_real_data(self):
     """Tests expected counts for real Hi-C data
     with multiple chromosomal arms. Known values were
     provided and expected counts for each diagonal
     calculated."""
     arms = HT.get_arms_hg19()
     cooler_file = cooler.Cooler(
         "testFiles/test3_realdata.mcool::/resolutions/50000")
     result = HT.get_expected(
         cooler_file, arms, proc=1,
         ignore_diagonals=0).drop(columns=["count.sum"])
     check = pd.read_csv("testFiles/test_expected_realdata.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = (check.drop(
         columns=["chrom", "start", "end"])[result.columns].sort_values(
             by=["region", "diag"]).reset_index(drop=True))
     sorted_result = result.sort_values(by=["region", "diag"]).reset_index(
         drop=True)
     assert_frame_equal(sorted_result, check_final)
예제 #13
0
 def test_case1(self):
     """Test assignment of small, synthetic set of regions."""
     bed_file = pd.read_csv("testFiles/testSmall.bed", sep="\t")
     result = HT.assign_regions(
         window=500000,
         binsize=50000,
         chroms=bed_file["chrom"],
         positions=bed_file["pos"],
         arms=self.arms,
     )
     expected = pd.read_csv("testFiles/testAssignRegions.csv")
     assert_frame_equal(result, expected)
예제 #14
0
 def test_region_spans_multiple_supports(self):
     """Tests assign regions when region spans multiple supports."""
     bed_file = pd.read_csv("testFiles/multiple_support_regions.csv")
     result = HT.assign_regions(
         window=1000000,
         binsize=20000,
         chroms=bed_file["chrom"],
         positions=bed_file["pos"],
         arms=self.arms,
     )
     expected = pd.read_csv("testFiles/multiple_support_regions_result.csv")
     assert_series_equal(result["region"], expected["region"])
예제 #15
0
 def test_equal_sized_windows(self):
     """Test flexible pileup with equally sized windows (obs/exp)."""
     position_frame = pd.read_csv("testFiles/posPileupSymmetric.csv")
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     expected = HT.get_expected(cooler_file,
                                arms,
                                proc=2,
                                ignore_diagonals=0)
     result = HT.extract_windows_different_sizes_obs_exp(
         position_frame, arms, cooler_file, expected)
     # load expected extracted windows
     with open("testFiles/test_pilesup_symmetric_obs_exp.pickle",
               "rb") as file_pointer:
         expected = pickle.load(file_pointer)
     self.assertTrue(
         all(np.allclose(i, j) for i, j in zip(result, expected)))
예제 #16
0
 def test_specific_regions(self):
     """Tests functionality to extract Obs/Exp values
     at specific regions from synthetic Hi-C data."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     pairing_score = HT.get_pairing_score_obs_exp(cooler_file,
                                                  exp_f,
                                                  50000,
                                                  regions=position_frame,
                                                  arms=arms,
                                                  norm=False)
     expected = pd.read_csv(
         "testFiles/test_pairingScore_obsExp_specificRegions.csv")
     assert_frame_equal(pairing_score, expected)
예제 #17
0
 def test_synthetic_data(self):
     """Tests expected counts for synthetic Hi-C data.
     Known values were provided and expected counts for each diagonal
     calculated."""
     result = HT.get_expected(self.cooler,
                              self.arms,
                              proc=1,
                              ignore_diagonals=0)
     check = pd.read_csv("testFiles/test_expected_chrSyn.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = check.drop(
         columns=["chrom", "start", "end"])[result.columns]
     assert_frame_equal(result, check_final)
예제 #18
0
 def test_regions_are_close_to_supports(self):
     """Tests assign regions when regions are close to supports -> bug was found"""
     bed_file = pd.read_csv(
         "testFiles/regions_close_to_support_boundary.tsv", sep="\t")
     result = HT.assign_regions(
         window=1000000,
         binsize=20000,
         chroms=bed_file["chrom"],
         positions=bed_file["pos"],
         arms=self.arms,
     )
     expected = pd.read_csv(
         "testFiles/regions_close_to_support_boundary_result.tsv",
         delim_whitespace=True,
     )
     assert_series_equal(result["region"], expected["region"])
예제 #19
0
 def test_synthetic_data_mult_chroms(self):
     """Tests expected counts for synthetic Hi-C data
     with multiple chromosomal arms. Known values were
     provided and expected counts for each diagonal
     calculated."""
     arms = pd.DataFrame({
         "chrom": ["chrSyn", "chrSyn"],
         "start": [0, 2000000],
         "end": [2000000, 4990000],
     })
     result = HT.get_expected(self.cooler, arms, proc=1, ignore_diagonals=0)
     check = pd.read_csv("testFiles/test_expected_multiple_chroms.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = check.drop(
         columns=["chrom", "start", "end"])[result.columns]
     assert_frame_equal(result, check_final)
예제 #20
0
 def test_differently_sized_windows(self):
     """Test flexible pileup with equally sized windows."""
     position_frame = pd.read_csv("testFiles/posPileupAsymmetric.csv")
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test3.mcool::/resolutions/10000")
     result = HT.extract_windows_different_sizes_iccf(
         position_frame, arms, cooler_file)
     # load expected extracted windows
     with open("testFiles/test_pilesup_asymmetric.pickle",
               "rb") as file_pointer:
         expected = pickle.load(file_pointer)
     self.assertTrue(
         all(np.allclose(i, j) for i, j in zip(result, expected)))
예제 #21
0
 def test_differently_sized_windows_different_arms(self):
     """tests pileup on differently sized windows that are on different chromosomal
     arms."""
     position_frame = pd.read_csv("testFiles/posPileupAsymmetric.csv")
     arms = pd.DataFrame({
         "chrom": ["chrSyn", "chrSyn"],
         "start": [0, 250000],
         "end": [250000, 4990000],
     })
     cooler_file = cooler.Cooler(
         "testFiles/test3.mcool::/resolutions/10000")
     result = HT.extract_windows_different_sizes_iccf(
         position_frame, arms, cooler_file)
     # load expected extracted windows
     with open("testFiles/test_pilesup_asymmetric.pickle",
               "rb") as file_pointer:
         expected = pickle.load(file_pointer)
     self.assertTrue(
         all(np.allclose(i, j) for i, j in zip(result, expected)))
예제 #22
0
 def test_wrong_parameters(self):
     """Tests raising of error when specific
     region pileup is done with the norm parameter set to True."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     bad_call = partial(
         HT.get_pairing_score_obs_exp,
         cooler_file,
         exp_f,
         50000,
         regions=position_frame,
         arms=arms,
         norm=True,
     )
     self.assertRaises(ValueError, bad_call)
예제 #23
0
 def test_arms(self):
     """Tests whether fetching and generating
     of supports for chromosomal arms of hg19 works."""
     check = pd.read_csv("./testFiles/arms.csv")
     arms = HT.get_arms_hg19()
     assert_frame_equal(check, arms)