Ejemplo n.º 1
0
    def evaluate_new_feature(self, prev_subset: list, new_feature, X_f: dict,
                             X_t: dict, y: np.array) -> float:
        A = prev_subset + [new_feature]
        scores = list()
        if self.n_jobs > 1:
            scores = Parallel(n_jobs=self.n_jobs)(
                delayed(self.score_function)(
                    A=A,
                    X_f=result['train']['transformed'],
                    X_f_test=result['test']['transformed'],
                    X_t=result['train']['plain'],
                    X_t_test=result['test']['plain'],
                    y=result['train']['target'],
                    y_test=result['test']['target'],
                    decision_function=clone(self.decision_function))
                for result in (split_dataset(X_t, X_f, y, self.seeds[i], 1 -
                                             self.train_share)
                               for i in range(self.n_cv_ffs)))
        else:
            for i in range(self.n_cv_ffs):
                result = split_dataset(X_t, X_f, y, self.seeds[i],
                                       1 - self.train_share)

                scores.append(
                    self.score_function(
                        A=A,
                        X_f=result['train']['transformed'],
                        X_f_test=result['test']['transformed'],
                        X_t=result['train']['plain'],
                        X_t_test=result['test']['plain'],
                        y=result['train']['target'],
                        y_test=result['test']['target'],
                        decision_function=self.decision_function))

        return float(np.mean(scores))
Ejemplo n.º 2
0
async def eval_new_feature(subset: list, new_feature: str, X_f: dict,
                           X_t: dict, y, n_cv_ffs: int, n_jobs: int,
                           seeds: list, train_share: float,
                           score_function_components, decision_function):
    A = subset + [new_feature]
    likelihoods = list()

    if n_jobs > 1:
        likelihoods = Parallel(n_jobs=n_jobs)(
            delayed(score_function_components)(
                A=A,
                X_f=result['train']['transformed'],
                X_f_test=result['test']['transformed'],
                X_t=result['train']['plain'],
                X_t_test=result['test']['plain'],
                y=result['train']['target'],
                y_test=result['test']['target'],
                decision_function=decision_function)
            for result in (split_dataset(X_t, X_f, y, seeds[i], 1 -
                                         train_share)
                           for i in range(n_cv_ffs)))
    else:
        for i in range(n_cv_ffs):
            result = split_dataset(X_t, X_f, y, seeds[i], 1 - train_share)

            likelihoods.append(
                score_function_components(
                    A=A,
                    X_f=result['train']['transformed'],
                    X_f_test=result['test']['transformed'],
                    X_t=result['train']['plain'],
                    X_t_test=result['test']['plain'],
                    y=result['train']['target'],
                    y_test=result['test']['target'],
                    decision_function=decision_function))

    return {
        k: float(np.mean([l[k] for l in likelihoods]))
        for k in likelihoods[0].keys()
    }
Ejemplo n.º 3
0
    def evaluate_new_feature(self, prev_subset, new_feature, X_f, X_t,
                             y) -> dict:
        A = prev_subset + [new_feature]
        likelihoods = list()
        if self.n_jobs > 1:
            likelihoods = Parallel(n_jobs=self.n_jobs)(
                delayed(self.score_function_components)(
                    A=A,
                    X_f=result['train']['transformed'],
                    X_f_test=result['test']['transformed'],
                    X_t=result['train']['plain'],
                    X_t_test=result['test']['plain'],
                    y=result['train']['target'],
                    y_test=result['test']['target'],
                    decision_function=clone(self.decision_function))
                for result in (split_dataset(X_t, X_f, y, self.seeds[i], 1 -
                                             self.train_share)
                               for i in range(self.n_cv_ffs)))
        else:
            for i in range(self.n_cv_ffs):
                result = split_dataset(X_t, X_f, y, self.seeds[i],
                                       1 - self.train_share)

                likelihoods.append(
                    self.score_function_components(
                        A=A,
                        X_f=result['train']['transformed'],
                        X_f_test=result['test']['transformed'],
                        X_t=result['train']['plain'],
                        X_t_test=result['test']['plain'],
                        y=result['train']['target'],
                        y_test=result['test']['target'],
                        decision_function=self.decision_function))

        return {
            k: np.mean([l[k] for l in likelihoods])
            for k in likelihoods[0].keys()
        }
Ejemplo n.º 4
0
    def score(self, numeric_features: list) -> np.float128:
        scores = list()

        for i in range(self.n_cv):
            result = split_dataset(self.X_t, self.X_f, self.y, self.seeds[i],
                                   1 - self.train_share)

            scores.append(
                self._score_function(
                    A=[self.feature_list[f] for f in numeric_features],
                    X_f=result['train']['transformed'],
                    X_f_test=result['test']['transformed'],
                    X_t=result['train']['plain'],
                    X_t_test=result['test']['plain'],
                    y=result['train']['target'],
                    y_test=result['test']['target'],
                ))

        return np.mean(scores)
Ejemplo n.º 5
0
    def evaluate(self, dataset: HoldoutDataset, seed):
        result = split_dataset(
            dataset.get_features(False),
            dataset.get_features(self.requires_linearisation),
            dataset.get_target(), seed, int(1))

        subset = self.optimizer.select(result['train']['plain'],
                                       result['train']['transformed'],
                                       result['train']['target'],
                                       dataset.get_continuous_feature_names())

        y_pred = self.train_and_fit(subset, result['train']['transformed'],
                                    result['train']['target'],
                                    result['test']['transformed'])

        return {
            'target': np.squeeze(result['test']['target']),
            'pred': y_pred,
            'subset': subset
        }