Ejemplo n.º 1
0
def test_average_ranks():
    features1 = [1, 2, 3]
    features2 = [0, 1, 4, 5]
    ranks1 = FeatureRanks(features=features1, ranks=[1, 2, 3], n_feats=10)
    ranks2 = FeatureRanks(features=features2, ranks=[1, 2, 3, 4], n_feats=10)
    avg_ranks = utils.average_ranks([ranks1, ranks2])
    assert isinstance(avg_ranks, FeatureRanks)
    assert avg_ranks[0] == 5.5
    assert avg_ranks[1] == 1.5
    assert avg_ranks[2] == 6
Ejemplo n.º 2
0
def inner_loop_results_3():
    return [
        FeatureEvaluationResults(
            ranks=FeatureRanks(features=[2, 4], ranks=[3, 2, 1]),
            test_score=0.3,
            model="model",
        ),
        FeatureEvaluationResults(
            ranks=FeatureRanks(features=[2, 4], ranks=[1.5, 1.5, 3]),
            test_score=0.25,
            model="model",
        ),
    ]
Ejemplo n.º 3
0
def inner_loop_results():
    return [
        FeatureEvaluationResults(
            ranks=FeatureRanks(features=[1, 2, 3, 4], ranks=[3, 2, 1, 4]),
            test_score=0.2,
            model="estimator",
        ),
        FeatureEvaluationResults(
            ranks=FeatureRanks(features=[1, 2, 3, 4], ranks=[1.5, 1.5, 3, 4]),
            test_score=0.2,
            model="estimator",
        ),
    ]
Ejemplo n.º 4
0
 def _get_feature_ranks(
         self, estimator: Estimator,
         features: Union[List[int], NumpyArray]) -> FeatureRanks:
     feature_importances = estimator.feature_importances
     ranks = rankdata(-feature_importances)
     return FeatureRanks(features=features,
                         ranks=ranks,
                         n_feats=self._n_initial_features)
Ejemplo n.º 5
0
def average_ranks(ranks: Iterable[FeatureRanks]) -> FeatureRanks:
    n_feats = set(r.n_feats for r in ranks)
    if len(n_feats) > 1:
        raise ValueError("Input ranks refer to different features")
    n_feats = n_feats.pop()
    features = np.arange(n_feats)
    avg_ranks = []
    for f in features:
        avg_rank = np.average([rank[f] for rank in ranks])
        avg_ranks.append(avg_rank)
    return FeatureRanks(features=features, ranks=avg_ranks)
Ejemplo n.º 6
0
def outer_loop_results():
    return [
        OuterLoopResults(
            n_features_to_score_map={
                5: 100,
                4: 5,
                3: 4,
                2: 5,
                1: 100
            },
            min_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[1, 2],
                                                  ranks=[1, 2],
                                                  n_feats=5),
                                              model="model"),
            max_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[1, 2, 3, 4],
                                                  ranks=[2, 1, 3, 4],
                                                  n_feats=5),
                                              model="model"),
            mid_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[1, 2, 3],
                                                  ranks=[1, 2, 3],
                                                  n_feats=5),
                                              model="model"),
        ),
        OuterLoopResults(
            n_features_to_score_map={
                5: 300,
                4: 6,
                3: 4,
                2: 7,
                1: 250
            },
            min_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[1, 2],
                                                  ranks=[1.5, 1.5],
                                                  n_feats=5),
                                              model="model"),
            max_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1, 2, 3],
                                                  ranks=[1, 2, 3, 4],
                                                  n_feats=5),
                                              model="model"),
            mid_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1, 2],
                                                  ranks=[3, 1, 2],
                                                  n_feats=5),
                                              model="model"),
        ),
    ]
Ejemplo n.º 7
0
def outer_loop_results2():
    return [
        OuterLoopResults(
            n_features_to_score_map={
                5: 150,
                4: 4,
                3: 4,
                2: 5,
                1: 120
            },
            min_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[2, 3],
                                                  ranks=[1, 2],
                                                  n_feats=5),
                                              model="model"),
            max_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1, 2, 3],
                                                  ranks=[3, 1, 2, 4],
                                                  n_feats=5),
                                              model="model"),
            mid_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1, 2],
                                                  ranks=[3, 1, 2],
                                                  n_feats=5),
                                              model="model"),
        ),
        OuterLoopResults(
            n_features_to_score_map={
                5: 200,
                4: 7,
                3: 1,
                2: 6,
                1: 220
            },
            min_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1],
                                                  ranks=[1, 2],
                                                  n_feats=5),
                                              model="model"),
            max_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[0, 1, 2, 4],
                                                  ranks=[4, 3, 1, 2],
                                                  n_feats=5),
                                              model="model"),
            mid_eval=FeatureEvaluationResults(test_score=0,
                                              ranks=FeatureRanks(
                                                  features=[1, 2, 3],
                                                  ranks=[3, 1, 2],
                                                  n_feats=5),
                                              model="model"),
        ),
    ]
Ejemplo n.º 8
0
def get_best_n_features(ranks: FeatureRanks, n_to_keep: int) -> List[int]:
    ranks_data = ranks.get_data()
    sorted_data = sorted(ranks_data.items(), key=lambda x: x[1])
    feats = [feat for feat, _ in sorted_data[0:n_to_keep]]

    if len(feats) == n_to_keep:
        return feats

    # pad with non-present features, scramble to not introduce a bias
    all_feats = np.arange(ranks.n_feats)
    np.random.shuffle(all_feats)
    for f in all_feats:
        if f not in ranks_data:
            feats.append(f)
            if len(feats) == n_to_keep:
                return feats

    raise ValueError("Impossible to return so many best features")
Ejemplo n.º 9
0
def raw_results():
    return [
        [
            OuterLoopResults(
                min_eval=FeatureEvaluationResults(
                    test_score=4,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10),
                ),
                max_eval=FeatureEvaluationResults(
                    test_score=5,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 2, 3], ranks=[1, 2, 4, 3], n_feats=10
                    ),
                ),
                mid_eval=FeatureEvaluationResults(
                    test_score=5,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1, 3], ranks=[1, 2, 3], n_feats=10),
                ),
                n_features_to_score_map={5: 4, 4: 3, 3: 3, 2: 3},
            ),
            OuterLoopResults(
                min_eval=FeatureEvaluationResults(
                    test_score=3,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10
                    ),
                ),
                max_eval=FeatureEvaluationResults(
                    test_score=3,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10
                    ),
                ),
                mid_eval=FeatureEvaluationResults(
                    test_score=2,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 4, 3], ranks=[1, 2, 3, 4], n_feats=10
                    ),
                ),
                n_features_to_score_map={5: 5, 4: 4, 3: 5, 2: 5},
            ),
        ],
        [
            OuterLoopResults(
                min_eval=FeatureEvaluationResults(
                    test_score=4,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10),
                ),
                max_eval=FeatureEvaluationResults(
                    test_score=5,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 4, 2], ranks=[1, 2, 3, 4], n_feats=10
                    ),
                ),
                mid_eval=FeatureEvaluationResults(
                    test_score=5,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1, 4], ranks=[2, 1, 3], n_feats=10),
                ),
                n_features_to_score_map={5: 5, 4: 3, 3: 5, 2: 3},
            ),
            OuterLoopResults(
                min_eval=FeatureEvaluationResults(
                    test_score=2,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1], ranks=[1, 2], n_feats=10),
                ),
                max_eval=FeatureEvaluationResults(
                    test_score=2,
                    model="model",
                    ranks=FeatureRanks(
                        features=[0, 1, 2, 3, 4], ranks=[1, 2, 5, 4, 3], n_feats=10
                    ),
                ),
                mid_eval=FeatureEvaluationResults(
                    test_score=2,
                    model="model",
                    ranks=FeatureRanks(features=[0, 1, 4], ranks=[1, 2, 3], n_feats=10),
                ),
                n_features_to_score_map={5: 5, 4: 6, 3: 5, 2: 5},
            ),
        ],
    ]
Ejemplo n.º 10
0
def test_get_best_ranks(n, best):
    ranks = FeatureRanks(features=[5, 0, 1, 4], ranks=[1, 2, 3, 4], n_feats=10)
    best_feats = utils.get_best_n_features(ranks, n)
    assert sorted(best_feats) == sorted(best)