def get_hypotheses(self, candidate_data, seed_data=None): X_cand, X_seed, y_seed = self.update_data(candidate_data, seed_data) steps = [('scaler', StandardScaler()), ('GP', self.GP)] pipeline = Pipeline(steps) bag_reg = BaggingRegressor(base_estimator=pipeline, n_estimators=self.n_estimators, max_samples=self.max_samples, bootstrap=self.bootstrap, verbose=True, n_jobs=self.n_jobs) self.cv_score = np.mean( -1.0 * cross_val_score(pipeline, X_seed, y_seed, cv=KFold(3, shuffle=True), scoring='neg_mean_absolute_error')) bag_reg.fit(X_seed, y_seed) # TODO: make this a static method def _get_unc(bagging_regressor, X_test): stds = [] pres = [] for est in bagging_regressor.estimators_: _p, _s = est.predict(X_test, return_std=True) stds.append(_s) pres.append(_p) return np.mean(np.array(pres), axis=0), np.min(np.array(stds), axis=0) # GP makes predictions for Hf and uncertainty*alpha on candidate data preds, stds = _get_unc(bag_reg, X_cand) expected = preds - stds * self.alpha # Update candidate data dataframe with predictions self.update_candidate_stabilities(expected, sort=True, floor=-6.0) # Find the most stable ones up to n_query within hull_distance stability_filter = self.candidate_data[ 'pred_stability'] < self.hull_distance within_hull = self.candidate_data[stability_filter] self.indices_to_compute = within_hull.head(self.n_query).index.tolist() return self.indices_to_compute
def evalOne(parameters): all_obs = [] all_pred = [] for location in locations: trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) all_obs.extend(testY) all_pred.extend(prediction) return rmseEval(all_obs, all_pred)[1]
def trainSVM(data, columns, targetColumn, parameters): modelColumns = [] for column in columns: if column != targetColumn: modelColumns.append(column) modelData = [] for i in range(0, len(data[targetColumn])): record = [] for column in modelColumns: record.append(data[column][i]) modelData.append(record) #model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=1e4,cache_size=5000), max_samples=4000,n_estimators=10, verbose=0, n_jobs=-1) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1) model.fit (modelData, data[targetColumn]) return SVMModel(model, modelColumns)
print(str(location)) trainX, testX, trainY, testY = splitDataForXValidation( location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=40, cache_size=5000), max_samples=4200, n_estimators=10, verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) for i in range(0, len(testY)): output.write(str(location)) output.write(",") output.write(str(testY[i])) output.write(",") output.write(str(prediction[i])) output.write("\n") output.close()