Beispiel #1
0
    def get_hypotheses(self, candidate_data, seed_data=None):
        X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data)

        steps = [('scaler', StandardScaler()), ('GP', self.GP)]
        pipeline = Pipeline(steps)

        bag_reg = BaggingRegressor(base_estimator=pipeline,
                                   n_estimators=self.n_estimators,
                                   max_samples=self.max_samples,
                                   bootstrap=self.bootstrap,
                                   verbose=True,
                                   n_jobs=self.n_jobs)
        self.cv_score = np.mean(
            -1.0 * cross_val_score(pipeline,
                                   X_seed,
                                   y_seed,
                                   cv=KFold(3, shuffle=True),
                                   scoring='neg_mean_absolute_error'))
        bag_reg.fit(X_seed, y_seed)

        # TODO: make this a static method
        def _get_unc(bagging_regressor, X_test):
            stds = []
            pres = []
            for est in bagging_regressor.estimators_:
                _p, _s = est.predict(X_test, return_std=True)
                stds.append(_s)
                pres.append(_p)
            return np.mean(np.array(pres), axis=0), np.min(np.array(stds),
                                                           axis=0)

        # GP makes predictions for Hf and uncertainty*alpha on candidate data
        preds, stds = _get_unc(bag_reg, X_cand)
        expected = preds - stds * self.alpha

        # Update candidate data dataframe with predictions
        self.update_candidate_stabilities(expected, sort=True, floor=-6.0)

        # Find the most stable ones up to n_query within hull_distance
        stability_filter = self.candidate_data[
            'pred_stability'] < self.hull_distance
        within_hull = self.candidate_data[stability_filter]

        self.indices_to_compute = within_hull.head(self.n_query).index.tolist()
        return self.indices_to_compute
def evalOne(parameters):
    all_obs = []
    all_pred = []
    for location in locations:
        trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target")
        normalizer_X = StandardScaler()
        trainX = normalizer_X.fit_transform(trainX)
        testX = normalizer_X.transform(testX)
        normalizer_Y = StandardScaler()
        trainY = normalizer_Y.fit_transform(trainY)
        testY = normalizer_Y.transform(testY)
        model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1)
        model.fit(trainX, trainY)
        prediction = model.predict(testX)
        prediction = normalizer_Y.inverse_transform(prediction)
        testY = normalizer_Y.inverse_transform(testY)
        all_obs.extend(testY)
        all_pred.extend(prediction)
        
    return rmseEval(all_obs, all_pred)[1]
Beispiel #3
0
def trainSVM(data, columns, targetColumn, parameters):
    
    modelColumns = []
    for column in columns:
        if column != targetColumn:
            modelColumns.append(column)
            
    modelData = []
    
    for i in range(0, len(data[targetColumn])):
        record = []
        for column in modelColumns:
            record.append(data[column][i])

        modelData.append(record)
    
    #model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=1e4,cache_size=5000), max_samples=4000,n_estimators=10, verbose=0, n_jobs=-1)
    model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1)
    
    model.fit (modelData, data[targetColumn])
    
    return SVMModel(model, modelColumns)
    print(str(location))
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, all_features, "target")
    normalizer_X = StandardScaler()
    trainX = normalizer_X.fit_transform(trainX)
    testX = normalizer_X.transform(testX)
    normalizer_Y = StandardScaler()
    trainY = normalizer_Y.fit_transform(trainY)
    testY = normalizer_Y.transform(testY)
    model = BaggingRegressor(base_estimator=SVR(kernel='rbf',
                                                C=40,
                                                cache_size=5000),
                             max_samples=4200,
                             n_estimators=10,
                             verbose=0,
                             n_jobs=-1)
    model.fit(trainX, trainY)
    prediction = model.predict(testX)
    prediction = normalizer_Y.inverse_transform(prediction)
    testY = normalizer_Y.inverse_transform(testY)

    for i in range(0, len(testY)):
        output.write(str(location))
        output.write(",")
        output.write(str(testY[i]))
        output.write(",")
        output.write(str(prediction[i]))
        output.write("\n")

output.close()