Ejemplo n.º 1
0
    def test_orthogonal_features(self):
        """
        Test orthogonal features: PCA features, importance vs PCA importance analysis
        """

        pca_features = get_orthogonal_features(self.X)

        # PCA features should have mean of 0
        self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7)

        # Check particular PCA values std
        self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.3813, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.0255, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 4]), 1.0011, delta=0.2)

        mdi_feat_imp = mean_decrease_impurity(self.fit_clf, self.X.columns)
        pca_corr_res = feature_pca_analysis(self.X, mdi_feat_imp)

        # Check correlation metrics results
        self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0],
                               0.7424,
                               delta=1e-1)

        # Check particular number of PCA features
        pca_ten_features = get_orthogonal_features(self.X, num_features=10)
        self.assertEqual(pca_ten_features.shape[1], 10)

        pca_five_features = get_orthogonal_features(self.X, num_features=5)
        self.assertEqual(pca_five_features.shape[1], 5)
    def test_orthogonal_features(self):
        """
        Test orthogonal features: PCA features, importance vs PCA importance analysis
        """

        # Init classifiers
        clf_base = RandomForestClassifier(n_estimators=1,
                                          criterion='entropy',
                                          bootstrap=False,
                                          class_weight='balanced_subsample')

        sb_clf = SequentiallyBootstrappedBaggingClassifier(
            base_estimator=clf_base,
            max_features=1.0,
            n_estimators=100,
            samples_info_sets=self.samples_info_sets,
            price_bars=self.price_bars_trim,
            oob_score=True,
            random_state=1)

        pca_features = get_orthogonal_features(self.X_train)

        # PCA features should have mean of 0
        self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7)

        # Check particular PCA values std
        self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.499, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.047, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 4]), 0.948, delta=0.2)

        sb_clf.fit(self.X_train, self.y_train_clf)
        mdi_feat_imp = feature_importance_mean_decrease_impurity(
            sb_clf, self.X_train.columns)
        pca_corr_res = feature_pca_analysis(self.X_train, mdi_feat_imp)

        # Check correlation metrics results
        self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0],
                               0.26,
                               delta=1e-1)
Ejemplo n.º 3
0
    def test_orthogonal_features(self):
        """
        Test orthogonal features: PCA features, importance vs PCA importance analysis
        """

        pca_features = get_orthogonal_features(self.X)

        # PCA features should have mean of 0
        self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7)
        self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7)

        # Check particular PCA values std
        self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.2503, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.0292, delta=0.2)
        self.assertAlmostEqual(np.std(pca_features[:, 4]), 1.0134, delta=0.2)

        mdi_feat_imp = mean_decrease_impurity(self.fit_clf, self.X.columns)
        pca_corr_res = feature_pca_analysis(self.X, mdi_feat_imp)

        # Check correlation metrics results
        self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0],
                               -0.0724,
                               delta=1e-1)