def test_orthogonal_features(self): """ Test orthogonal features: PCA features, importance vs PCA importance analysis """ pca_features = get_orthogonal_features(self.X) # PCA features should have mean of 0 self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7) # Check particular PCA values std self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.3813, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.0255, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 4]), 1.0011, delta=0.2) mdi_feat_imp = mean_decrease_impurity(self.fit_clf, self.X.columns) pca_corr_res = feature_pca_analysis(self.X, mdi_feat_imp) # Check correlation metrics results self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0], 0.7424, delta=1e-1) # Check particular number of PCA features pca_ten_features = get_orthogonal_features(self.X, num_features=10) self.assertEqual(pca_ten_features.shape[1], 10) pca_five_features = get_orthogonal_features(self.X, num_features=5) self.assertEqual(pca_five_features.shape[1], 5)
def test_orthogonal_features(self): """ Test orthogonal features: PCA features, importance vs PCA importance analysis """ # Init classifiers clf_base = RandomForestClassifier(n_estimators=1, criterion='entropy', bootstrap=False, class_weight='balanced_subsample') sb_clf = SequentiallyBootstrappedBaggingClassifier( base_estimator=clf_base, max_features=1.0, n_estimators=100, samples_info_sets=self.samples_info_sets, price_bars=self.price_bars_trim, oob_score=True, random_state=1) pca_features = get_orthogonal_features(self.X_train) # PCA features should have mean of 0 self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7) # Check particular PCA values std self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.499, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.047, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 4]), 0.948, delta=0.2) sb_clf.fit(self.X_train, self.y_train_clf) mdi_feat_imp = feature_importance_mean_decrease_impurity( sb_clf, self.X_train.columns) pca_corr_res = feature_pca_analysis(self.X_train, mdi_feat_imp) # Check correlation metrics results self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0], 0.26, delta=1e-1)
def test_orthogonal_features(self): """ Test orthogonal features: PCA features, importance vs PCA importance analysis """ pca_features = get_orthogonal_features(self.X) # PCA features should have mean of 0 self.assertAlmostEqual(np.mean(pca_features[:, 2]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 5]), 0, delta=1e-7) self.assertAlmostEqual(np.mean(pca_features[:, 6]), 0, delta=1e-7) # Check particular PCA values std self.assertAlmostEqual(np.std(pca_features[:, 1]), 1.2503, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 3]), 1.0292, delta=0.2) self.assertAlmostEqual(np.std(pca_features[:, 4]), 1.0134, delta=0.2) mdi_feat_imp = mean_decrease_impurity(self.fit_clf, self.X.columns) pca_corr_res = feature_pca_analysis(self.X, mdi_feat_imp) # Check correlation metrics results self.assertAlmostEqual(pca_corr_res['Weighted_Kendall_Rank'][0], -0.0724, delta=1e-1)