Example #1
0
    def test_non_linear_correlations_df_minmax(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        iris = datasets.load_iris()
        X = iris.data[:, :4]
        df = pandas.DataFrame(X)
        df.columns = ["X1", "X2", "X3", "X4"]
        cor, mini, maxi = non_linear_correlations(
            df, LinearRegression(fit_intercept=False), minmax=True)
        self.assertEqual(cor.shape, (4, 4))
        self.assertEqual(list(cor.columns), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(cor.index), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(cor.iloc[i, i]
                              for i in range(0, 4)), [1, 1, 1, 1])
        self.assertEqual(list(mini.iloc[i, i]
                              for i in range(0, 4)), [1, 1, 1, 1])
        self.assertEqual(list(maxi.iloc[i, i]
                              for i in range(0, 4)), [1, 1, 1, 1])
        self.assertGreater(cor.values.min(), 0)
        self.assertEqual(list(mini.columns), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(mini.index), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(maxi.columns), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(maxi.index), ["X1", "X2", "X3", "X4"])
        self.assertEqual(mini.shape, (4, 4))
        self.assertLesser(mini.values.min(), cor.values.min())
        self.assertEqual(maxi.shape, (4, 4))
        self.assertGreater(maxi.values.max(), cor.values.max())
Example #2
0
    def test_non_linear_correlations_array(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        iris = datasets.load_iris()
        X = iris.data[:, :4]
        df = pandas.DataFrame(X).values
        cor = non_linear_correlations(
            df, LinearRegression(fit_intercept=False))
        self.assertEqual(cor.shape, (4, 4))
        self.assertEqual(list(cor[i, i] for i in range(0, 4)), [1, 1, 1, 1])
        self.assertGreater(cor.min(), 0)
Example #3
0
    def test_non_linear_correlations_df_tree(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        iris = datasets.load_iris()
        X = iris.data[:, :4]
        df = pandas.DataFrame(X)
        df.columns = ["X1", "X2", "X3", "X4"]
        cor = non_linear_correlations(df, RandomForestRegressor())
        self.assertEqual(cor.shape, (4, 4))
        self.assertEqual(list(cor.columns), ["X1", "X2", "X3", "X4"])
        self.assertEqual(list(cor.index), ["X1", "X2", "X3", "X4"])
        self.assertGreater(max(cor.iloc[i, i] for i in range(0, 4)), 0.98)
        self.assertGreater(cor.values.min(), 0)