예제 #1
0
def _train(train_data: DataFrame,
           regressor: RegressorMixin,
           clusterer: Clustering,
           do_cv=False) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']

            if do_cv:
                cross_validation_result = cross_validate(
                    regressor,
                    cluster_train_df.drop('label', 1),
                    cluster_targets_df.values.ravel(),
                    return_estimator=True,
                    cv=10  #TODO per Chiara check se vuoi 10 cv
                )

                validation_scores = cross_validation_result['test_score']
                regressors = cross_validation_result['estimator']
                regressor = regressors[dict(
                    zip(validation_scores, range(len(validation_scores)))
                )[max(
                    validation_scores
                )]]  #TODO per Chiara check se vuoi il max o min o quello che sta in mezzo
            else:
                regressor.fit(cluster_train_df.drop('label', 1),
                              cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.REGRESSOR.value: models
    }
예제 #2
0
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']
            regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {'clusterer': clusterer, PredictiveModels.REGRESSION.value: models}
예제 #3
0
def _cv_estimate(model: RegressorMixin,
                 train_data: pd.DataFrame,
                 features: List[str],
                 y: str,
                 n_splits: int) -> Tuple[pd.Series, List[RegressorMixin]]:

    cv = KFold(n_splits=n_splits)
    models = []
    cv_pred = pd.Series(np.nan, index=train_data.index)

    for train, test in cv.split(train_data):
        m = model.fit(train_data[features].iloc[train], train_data[y].iloc[train])
        cv_pred.iloc[test] = m.predict(train_data[features].iloc[test])
        models += [m]

    return cv_pred, models