def test_average_ranks(): features1 = [1, 2, 3] features2 = [0, 1, 4, 5] ranks1 = FeatureRanks(features=features1, ranks=[1, 2, 3], n_feats=10) ranks2 = FeatureRanks(features=features2, ranks=[1, 2, 3, 4], n_feats=10) avg_ranks = utils.average_ranks([ranks1, ranks2]) assert isinstance(avg_ranks, FeatureRanks) assert avg_ranks[0] == 5.5 assert avg_ranks[1] == 1.5 assert avg_ranks[2] == 6
def inner_loop_results_3(): return [ FeatureEvaluationResults( ranks=FeatureRanks(features=[2, 4], ranks=[3, 2, 1]), test_score=0.3, model="model", ), FeatureEvaluationResults( ranks=FeatureRanks(features=[2, 4], ranks=[1.5, 1.5, 3]), test_score=0.25, model="model", ), ]
def inner_loop_results(): return [ FeatureEvaluationResults( ranks=FeatureRanks(features=[1, 2, 3, 4], ranks=[3, 2, 1, 4]), test_score=0.2, model="estimator", ), FeatureEvaluationResults( ranks=FeatureRanks(features=[1, 2, 3, 4], ranks=[1.5, 1.5, 3, 4]), test_score=0.2, model="estimator", ), ]
def _get_feature_ranks( self, estimator: Estimator, features: Union[List[int], NumpyArray]) -> FeatureRanks: feature_importances = estimator.feature_importances ranks = rankdata(-feature_importances) return FeatureRanks(features=features, ranks=ranks, n_feats=self._n_initial_features)
def average_ranks(ranks: Iterable[FeatureRanks]) -> FeatureRanks: n_feats = set(r.n_feats for r in ranks) if len(n_feats) > 1: raise ValueError("Input ranks refer to different features") n_feats = n_feats.pop() features = np.arange(n_feats) avg_ranks = [] for f in features: avg_rank = np.average([rank[f] for rank in ranks]) avg_ranks.append(avg_rank) return FeatureRanks(features=features, ranks=avg_ranks)
def outer_loop_results(): return [ OuterLoopResults( n_features_to_score_map={ 5: 100, 4: 5, 3: 4, 2: 5, 1: 100 }, min_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[1, 2], ranks=[1, 2], n_feats=5), model="model"), max_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[1, 2, 3, 4], ranks=[2, 1, 3, 4], n_feats=5), model="model"), mid_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[1, 2, 3], ranks=[1, 2, 3], n_feats=5), model="model"), ), OuterLoopResults( n_features_to_score_map={ 5: 300, 4: 6, 3: 4, 2: 7, 1: 250 }, min_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[1, 2], ranks=[1.5, 1.5], n_feats=5), model="model"), max_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1, 2, 3], ranks=[1, 2, 3, 4], n_feats=5), model="model"), mid_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1, 2], ranks=[3, 1, 2], n_feats=5), model="model"), ), ]
def outer_loop_results2(): return [ OuterLoopResults( n_features_to_score_map={ 5: 150, 4: 4, 3: 4, 2: 5, 1: 120 }, min_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[2, 3], ranks=[1, 2], n_feats=5), model="model"), max_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1, 2, 3], ranks=[3, 1, 2, 4], n_feats=5), model="model"), mid_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1, 2], ranks=[3, 1, 2], n_feats=5), model="model"), ), OuterLoopResults( n_features_to_score_map={ 5: 200, 4: 7, 3: 1, 2: 6, 1: 220 }, min_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1], ranks=[1, 2], n_feats=5), model="model"), max_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[0, 1, 2, 4], ranks=[4, 3, 1, 2], n_feats=5), model="model"), mid_eval=FeatureEvaluationResults(test_score=0, ranks=FeatureRanks( features=[1, 2, 3], ranks=[3, 1, 2], n_feats=5), model="model"), ), ]
def get_best_n_features(ranks: FeatureRanks, n_to_keep: int) -> List[int]: ranks_data = ranks.get_data() sorted_data = sorted(ranks_data.items(), key=lambda x: x[1]) feats = [feat for feat, _ in sorted_data[0:n_to_keep]] if len(feats) == n_to_keep: return feats # pad with non-present features, scramble to not introduce a bias all_feats = np.arange(ranks.n_feats) np.random.shuffle(all_feats) for f in all_feats: if f not in ranks_data: feats.append(f) if len(feats) == n_to_keep: return feats raise ValueError("Impossible to return so many best features")
def raw_results(): return [ [ OuterLoopResults( min_eval=FeatureEvaluationResults( test_score=4, model="model", ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10), ), max_eval=FeatureEvaluationResults( test_score=5, model="model", ranks=FeatureRanks( features=[0, 1, 2, 3], ranks=[1, 2, 4, 3], n_feats=10 ), ), mid_eval=FeatureEvaluationResults( test_score=5, model="model", ranks=FeatureRanks(features=[0, 1, 3], ranks=[1, 2, 3], n_feats=10), ), n_features_to_score_map={5: 4, 4: 3, 3: 3, 2: 3}, ), OuterLoopResults( min_eval=FeatureEvaluationResults( test_score=3, model="model", ranks=FeatureRanks( features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10 ), ), max_eval=FeatureEvaluationResults( test_score=3, model="model", ranks=FeatureRanks( features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10 ), ), mid_eval=FeatureEvaluationResults( test_score=2, model="model", ranks=FeatureRanks( features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10 ), ), n_features_to_score_map={5: 5, 4: 4, 3: 5, 2: 5}, ), ], [ OuterLoopResults( min_eval=FeatureEvaluationResults( test_score=4, model="model", ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10), ), max_eval=FeatureEvaluationResults( test_score=5, model="model", ranks=FeatureRanks( features=[0, 1, 4, 2], ranks=[1, 2, 3, 4], n_feats=10 ), ), mid_eval=FeatureEvaluationResults( test_score=5, model="model", ranks=FeatureRanks(features=[0, 1, 4], ranks=[2, 1, 3], n_feats=10), ), n_features_to_score_map={5: 5, 4: 3, 3: 5, 2: 3}, ), OuterLoopResults( min_eval=FeatureEvaluationResults( test_score=2, model="model", ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10), ), max_eval=FeatureEvaluationResults( test_score=2, model="model", ranks=FeatureRanks( features=[0, 1, 2, 3, 4], ranks=[1, 2, 5, 4, 3], n_feats=10 ), ), mid_eval=FeatureEvaluationResults( test_score=2, model="model", ranks=FeatureRanks(features=[0, 1, 4], ranks=[1, 2, 3], n_feats=10), ), n_features_to_score_map={5: 5, 4: 6, 3: 5, 2: 5}, ), ], ]
def test_get_best_ranks(n, best): ranks = FeatureRanks(features=[5, 0, 1, 4], ranks=[1, 2, 3, 4], n_feats=10) best_feats = utils.get_best_n_features(ranks, n) assert sorted(best_feats) == sorted(best)