Ejemplo n.º 1
0
 def test_calculate_with_p_value_without_nans(self) -> None:
     correlations, p_values = NaNCorrMp.calculate_with_p_value(self.X, n_jobs=2, chunks=1)
     empty_dataframe = pd.DataFrame(columns=correlations.columns, index=correlations.index, copy=True, dtype=np.float64)
     expected_correlations, expected_p_values = empty_dataframe.copy(), empty_dataframe.copy()
     for column in self.X.columns:
         for other_column in self.X.columns:
             expected_correlation, expected_p_value = pearsonr(self.X[column], self.X[other_column])
             expected_correlations[column][other_column] = expected_correlation
             expected_p_values[column][other_column] = expected_p_value
     assert_frame_equal(correlations, expected_correlations)
     assert_frame_equal(p_values, expected_p_values)
Ejemplo n.º 2
0
 def test_benchmark_with_numpy_without_nans(self) -> None:
     size = 1200
     n_jobs = 4
     np.random.seed(0)
     random_dataframe = pd.DataFrame(np.random.rand(size, size))
     t_start = time.perf_counter()
     nancorrmp_result = NaNCorrMp.calculate(random_dataframe, n_jobs=n_jobs, chunks=1000)
     nancorrmp_time = time.perf_counter() - t_start
     print(f'NaNCorrMp time with {n_jobs} jobs: {nancorrmp_time}')
     t_start = time.perf_counter()
     numpy_result = np.corrcoef(random_dataframe.to_numpy().transpose())
     numpy_time = time.perf_counter() - t_start
     print(f'numpy time: {numpy_time}')
     print(f'nancorrmp_time / numpy_time ratio: {nancorrmp_time/numpy_time}')
     assert_array_almost_equal(nancorrmp_result.to_numpy(), numpy_result)
     self.assertTrue(nancorrmp_time < numpy_time)
Ejemplo n.º 3
0
 def test_benchmark_with_pandas_without_nans(self) -> None:
     # faster when size > 1200, n_jobs=4
     size = 1200
     n_jobs = 4
     np.random.seed(0)
     random_dataframe = pd.DataFrame(np.random.rand(size, size))
     t_start = time.perf_counter()
     nancorrmp_result = NaNCorrMp.calculate(random_dataframe, n_jobs=n_jobs, chunks=1000)
     nancorrmp_time = time.perf_counter() - t_start
     print(f'NaNCorrMp time with {n_jobs} jobs: {nancorrmp_time}')
     t_start = time.perf_counter()
     pandas_result = random_dataframe.corr()
     pandas_time = time.perf_counter() - t_start
     print(f'pandas time: {pandas_time}')
     print(f'nancorrmp_time / pandas_time ratio: {nancorrmp_time/pandas_time}')
     assert_array_almost_equal(nancorrmp_result.to_numpy(), pandas_result.to_numpy())
     self.assertTrue(nancorrmp_time < pandas_time)
Ejemplo n.º 4
0
 def test_calculate_with_p_value_with_nans(self) -> None:
     correlations, p_values = NaNCorrMp.calculate_with_p_value(self.X_nans, n_jobs=2, chunks=1)
     self.assertFalse(correlations.isnull().values.any())
     self.assertFalse(p_values.isnull().values.any())
Ejemplo n.º 5
0
 def test_single_core_with_infs(self) -> None:
     result = NaNCorrMp.calculate(self.X_infs, n_jobs=1, chunks=1)
     expected_result = self.X_infs.corr()
     assert_frame_equal(result, expected_result)
Ejemplo n.º 6
0
 def test_single_core_without_nans(self) -> None:
     result = NaNCorrMp.calculate(self.X, n_jobs=1)
     expected_result = self.X.corr()
     assert_frame_equal(result, expected_result)
Ejemplo n.º 7
0
 def test_with_numpy_input(self) -> None:
     result = NaNCorrMp.calculate(self.X_nans.to_numpy().transpose(), n_jobs=2, chunks=1)
     self.assertEqual(type(result), np.ndarray)
     expected_result = self.X_nans.corr()
     assert_array_almost_equal(result, expected_result.to_numpy().transpose())
Ejemplo n.º 8
0
 def test_with_nans(self) -> None:
     result = NaNCorrMp.calculate(self.X_nans, n_jobs=2, chunks=1)
     expected_result = self.X_nans.corr()
     assert_frame_equal(result, expected_result)