def SVR_ST(trainFileName, testFileName): trainData = ld.LoadData_DATA_ST(trainFileName) testData = ld.LoadData_DATA_ST(testFileName) store = ['1', '2', '3', '4', '5'] res = [] for i in store: train_X = [] train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:]] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [] items = [] context = testData[i] for array in context: items.append((array[0], array[1])) array = [float(x) for x in array[2:]] test_X.append((array[2:])) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2]) for i in range(len(test_X)): res.append([ items[i][0], items[i][1], '%.4f' % max(pred_y[i], 0), '%.4f' % test_X[i, -4], '%.4f' % (float(test_X[i, -5]) * 2) ]) return res
def SVR_ST(trainFileName,testFileName): trainData = ld.LoadData_DATA_ST(trainFileName) testData = ld.LoadData_DATA_ST(testFileName) store = ['1','2','3','4','5'] res = [] for i in store: train_X = [];train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:] ] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [];items = [] context = testData[i] for array in context: items.append((array[0],array[1])) array = [float(x) for x in array[2:] ] test_X.append((array[2:])) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2]) for i in range(len(test_X)): res.append([items[i][0],items[i][1],'%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)]) return res
def SVR_ST_train(): trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv') testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv') store = ['1','2','3','4','5'] res = [] for i in store: train_X = [];train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:] ] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [];test_y = [];items = [] context = testData[i] for array in context: items.append((array[0],array[1])) array = [float(x) for x in array[2:] ] test_X.append((array[2:-1])) test_y.append(array[-1]) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1]) for i in range(len(test_X)): res.append([items[i][0],items[i][1],'%.2f'%max(pred_y[i],0),'%.2f'%max(test_X[i,-4],0),'%.2f'%max(2*test_X[i,-5],0)]) return res
def test_ml_pipeline(): 'load a test data set, run SVM on it, and plot the predictions vs the actual values' data, targets = ReactivityDataLoader().load_mopac_learning() regressor = SVR(C=1000) trainData, testData, trainTargets, testTargets = train_test_split(data, targets) regressor.fit(trainData, trainTargets) os.chdir(str(Path.home() / 'Desktop')) main.plotScatterPlot(testTargets, regressor.predict(testData), 'predictedVsActual')
def SVR_ALL(trainFileName,testFileName): train_X,train_y,_= ld.LoadData_DATA_LABEL_ITEM(trainFileName) test_X,items= ld.LoadData_DATA_ITEM(testFileName) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2]) res =[] for i in range(len(test_X)): res.append([items[i],'all','%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)]) return res
def SVR_ALL_train(): train_X,train_y,_= ld.loadData_all('./data/EVAL_DataSet1.csv') test_X,test_y,items = ld.loadData_all('./data/VALIDATION_DataSet1.csv') train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1]) res =[] for i in range(len(test_X)): res.append([items[i],'all','%.2f'%max(pred_y[i],0),'%.2f'%test_X[i,-4],'%.2f'%(float(test_X[i,-5])*2)]) return res
def init_model(self): return SVR(kernel="rbf", C=self.c, epsilon=self.eps, tol=self.tol, max_iter=self.max_iter, gamma=self.gamma)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def run(self): kernel_lookup = { 'Radial Basis Function': 'rbf', 'Linear': 'linear', 'Polynomial': 'poly', 'Sigmoid': 'sigmoid', 'Precomputed': 'precomputed' } kernel = kernel_lookup[self.kernelComboBox.currentText()] params = { 'C': self.cDoubleSpinBox.value(), 'epsilon': self.epsilonDoubleSpinBox.value(), 'kernel': kernel, 'degree': self.degreeSpinBox.value(), 'gamma': self.gammaComboBox.currentText(), 'coef0': self.coeff0DoubleSpinBox.value(), 'shrinking': self.shrinkingCheckBox.isChecked(), 'tol': self.toleranceDoubleSpinBox.value(), 'cache_size': self.cacheSizeSpinBox.value(), 'verbose': self.verboseCheckBox.isChecked(), 'max_iter': int(self.maxIterationsSpinBox.value()) } return params, self.getChangedValues(params, SVR())
def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=(-1)): self._hyperparams = { 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'coef0': coef0, 'tol': tol, 'C': C, 'epsilon': epsilon, 'shrinking': shrinking, 'cache_size': cache_size, 'verbose': verbose, 'max_iter': max_iter } self._wrapped_model = SKLModel(**self._hyperparams)
def SVR_ALL(trainFileName, testFileName): train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName) test_X, items = ld.LoadData_DATA_ITEM(testFileName) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2]) res = [] for i in range(len(test_X)): res.append([ items[i], 'all', '%.4f' % max(pred_y[i], 0), '%.4f' % test_X[i, -4], '%.4f' % (float(test_X[i, -5]) * 2) ]) return res
def SVR_ALL_train(): train_X, train_y, _ = ld.loadData_all('./data/EVAL_DataSet1.csv') test_X, test_y, items = ld.loadData_all('./data/VALIDATION_DataSet1.csv') train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1]) res = [] for i in range(len(test_X)): res.append([ items[i], 'all', '%.2f' % max(pred_y[i], 0), '%.2f' % test_X[i, -4], '%.2f' % (float(test_X[i, -5]) * 2) ]) return res
def init_model(self): return SVR(kernel="sigmoid", C=self.c, epsilon=self.eps, tol=self.tol, max_iter=self.max_iter, coef0=self.coef0, gamma=self.gamma)
def init_model(self): return SVR(kernel="poly", degree=self.degree, C=self.c, epsilon=self.eps, tol=self.tol, max_iter=self.max_iter, coef0=self.coef0, gamma=self.gamma)
class SVRImpl(): def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=(-1)): self._hyperparams = { 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'coef0': coef0, 'tol': tol, 'C': C, 'epsilon': epsilon, 'shrinking': shrinking, 'cache_size': cache_size, 'verbose': verbose, 'max_iter': max_iter } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def SVR_ST_train(): trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv') testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv') store = ['1', '2', '3', '4', '5'] res = [] for i in store: train_X = [] train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:]] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [] test_y = [] items = [] context = testData[i] for array in context: items.append((array[0], array[1])) array = [float(x) for x in array[2:]] test_X.append((array[2:-1])) test_y.append(array[-1]) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1]) for i in range(len(test_X)): res.append([ items[i][0], items[i][1], '%.2f' % max(pred_y[i], 0), '%.2f' % max(test_X[i, -4], 0), '%.2f' % max(2 * test_X[i, -5], 0) ]) return res
def run(self): params = { 'C': self.cDoubleSpinBox.value(), 'epsilon': self.epsilonDoubleSpinBox.value(), 'kernel': self.kernelComboBox.currentText(), 'degree': self.degreeSpinBox.value(), 'gamma': self.gammaComboBox.currentText(), 'coef0': self.coeff0DoubleSpinBox.value(), 'shrinking': self.shrinkingCheckBox.isChecked(), 'tol': self.toleranceDoubleSpinBox.value(), 'cache_size': self.cacheSizeSpinBox.value(), 'verbose': self.verboseCheckBox.isChecked(), 'max_iter': int(self.maxIterationsSpinBox.value()) } return params, self.getChangedValues(params, SVR())
def evalOne(parameters): all_obs = [] all_pred = [] for location in locations: trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) all_obs.extend(testY) all_pred.extend(prediction) return rmseEval(all_obs, all_pred)[1]
def individual_training_executor(self, dim): # make a pipeline with preprocessing, autoencoder, regression scaler = MinMaxScaler(feature_range=(-0.5,0.5)) autoencoder = Autoencoder(logPath=self.get_path(dim), hiddenDims=[50,dim],beta=0.1) mlPipeline = make_pipeline(scaler, autoencoder) # read in the data and train the autoencoder data, targets = self.read_mopac_reactivity_data() mlPipeline.fit(data, targets) # test the accuracy of an SVM on the transformed data using cross validation latent = mlPipeline.transform(data) regressor = SVR(C=10000) cross_validator = KFold(n_splits=5, shuffle=True, random_state=40) predictions = cross_val_predict(regressor, latent, targets, cv=cross_validator) # make a cross_val_predict-ed vs actual graph main.plotScatterPlot(targets, predictions, 'predictedVsActual') # print the cross validation actual and predicted targets to file actualThenPredicted = np.array([targets, predictions]) np.savetxt('actualThenPredicted.txt', actualThenPredicted)
ARDRegression(), # HuberRegressor(), # epsilon: greater than 1.0, default 1.35 LinearRegression(n_jobs=5), PassiveAggressiveRegressor( random_state=randomstate), # C: 0.25, 0.5, 1, 5, 10 SGDRegressor(random_state=randomstate), TheilSenRegressor(n_jobs=5, random_state=randomstate), RANSACRegressor(random_state=randomstate), KNeighborsRegressor( weights='distance'), # n_neighbors: 3, 6, 9, 12, 15, 20 RadiusNeighborsRegressor(weights='distance'), # radius: 1, 2, 5, 10, 15 MLPRegressor(max_iter=10000000, random_state=randomstate), DecisionTreeRegressor( random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 ExtraTreeRegressor(random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 SVR() # C: 0.25, 0.5, 1, 5, 10 ] selectors = [ reliefF.reliefF, fisher_score.fisher_score, # chi_square.chi_square, JMI.jmi, CIFE.cife, DISR.disr, MIM.mim, CMIM.cmim, ICAP.icap, MRMR.mrmr, MIFS.mifs ]
l1_ratio=0.25, fit_intercept=True), 'complexity_label': 'non-zero coefficients', 'complexity_computer': lambda clf: np.count_nonzero(clf.coef_) }, { 'name': 'RandomForest', 'instance': RandomForestRegressor(), 'complexity_label': 'estimators', 'complexity_computer': lambda clf: clf.n_estimators }, { 'name': 'SVR', 'instance': SVR(kernel='rbf'), 'complexity_label': 'support vectors', 'complexity_computer': lambda clf: len(clf.support_vectors_) }, ] } benchmark(configuration) # benchmark n_features influence on prediction speed percentile = 90 percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'], configuration['n_test'], [100, 250, 500], percentile) plot_n_features_influence(percentiles, percentile) # benchmark throughput
def connectWidgets(self): svr = SVR() svr.kernel = 'rbf' svr.degree = 3 svr.gamma = 'auto' svr.coef0 = 0.0 svr.tol = 1e-3 svr.C = 1.0 svr.epsilon = 0.1 svr.shrinking = True svr.cache_size = 200 svr.verbose = False svr.max_iter = -1 self.cLineEdit.setText(str(svr.C)) self.epsilonLineEdit.setText(str(svr.epsilon)) self.kernel_list.setCurrentItem( self.kernel_list.findItems('Radial Basis Function', QtCore.Qt.MatchExactly)[0]) self.degreeLineEdit.setText(str(svr.degree)) self.coeff0LineEdit.setText(str(svr.coef0)) self.shrinking_list.setCurrentItem( self.shrinking_list.findItems(str(svr.shrinking), QtCore.Qt.MatchExactly)[0]) self.toleranceLineEdit.setText(str(svr.tol)) self.maxIterationsLineEdit.setText(str(svr.max_iter))
task='meg') ds = loader.fetch() # Preprocessing pipeline = PreprocessingPipeline(nodes=[ SampleSlicer({ 'band': ['alpha'], 'condition': ['vipassana'] }), FeatureWiseNormalizer(), TargetTransformer("expertise_hours") ]) ds_ = pipeline.transform(ds) # Estimator estimator_pp = Pipeline(steps=[('svr', SVR(C=1, kernel='linear'))]) cross_validation = GroupShuffleSplit(n_splits=10, test_size=0.25) scores = ['r2', 'explained_variance'] cv_attr = 'subject' sl = SearchLight(estimator=estimator_pp, scoring=scores, cv=cross_validation) sl.fit(ds_, cv_attr=cv_attr) #### Cross Validation ### cross_validation = GroupShuffleSplit(n_splits=150, test_size=0.25) groups = LabelEncoder().fit_transform(ds_.sa.subject) X = ds_.samples y = LabelEncoder().fit_transform(ds_.targets) train_list = [] for train, test in cross_validation.split(X, y, groups=groups):
K_N_N = KNeighborsClassifier() SUPPORT_VECTOR = svm.SVC(kernel="linear") # Ensemble classifiers RANDOM_FOREST = RandomForestClassifier(n_estimators=100) GRADIENT_BOOST_CL = GradientBoostingClassifier(n_estimators=100) ADA_BOOST = AdaBoostClassifier(n_estimators=100) EXTRA_TREE = ExtraTreesClassifier(n_estimators=100) # Regressors GRADIENT_BOOST_RG = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1) LINEAR_RG = LinearRegression() RIDGE_RG = Ridge() LASSO_RG = Lasso() SVR_RG = SVR() def getClassifierMap(): CLASSIFIER_MAP = { "DECISION_TREE": DECISION_TREE, "LOGISTIC_REGRESSION": LOGISTIC_REGRESSION, "NAIVE_BAYS": NAIVE_BAYS, "K_N_N": K_N_N, "SUPPORT_VECTOR": SUPPORT_VECTOR, "RANDOM_FOREST": RANDOM_FOREST, "GRADIENT_BOOST": GRADIENT_BOOST_CL, "ADA_BOOST": GRADIENT_BOOST_CL, "EXTRA_TREE": EXTRA_TREE } return CLASSIFIER_MAP
def __sv_regressor__(self, data, target): from sklearn.svm.classes import SVR svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_rbf.fit(data, target) self.ensemble = svr_rbf
def connectWidgets(self): svr = SVR() svr.kernel = 'rbf' svr.degree = 3 svr.gamma = 'auto' svr.coef0 = 0.0 svr.tol = 1e-3 svr.C = 1.0 svr.epsilon = 0.1 svr.shrinking = True svr.cache_size = 200 svr.verbose = False svr.max_iter = -1 self.cDoubleSpinBox.setValue(svr.C) self.epsilonDoubleSpinBox.setValue(svr.epsilon) self.defaultComboItem(self.kernelComboBox, svr.kernel) self.degreeSpinBox.setValue(svr.degree) self.defaultComboItem(self.gammaComboBox, svr.gamma) self.coeff0DoubleSpinBox.setValue(svr.coef0) self.shrinkingCheckBox.setChecked(svr.shrinking) self.toleranceDoubleSpinBox.setValue(svr.tol) self.cacheSizeSpinBox.setValue(svr.cache_size) self.verboseCheckBox.setChecked(svr.verbose) self.maxIterationsSpinBox.setValue(svr.max_iter)
print X_train.shape print y_train.shape print X_test.shape print y_test.shape print X_train[123, :] ''' norm1 = np.linalg.norm(y_train) if norm1 != 0: y_train, y_test = y_train/norm1, y_test/norm1 print norm1 ''' print y_train.shape model = SVR(C=1.0, gamma=1.0) model = LinearRegression() lasso = Lasso(alpha=0.1).fit(X_train, y_train) enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train) y_pred = lasso.predict(X_test) print "MSE", mean_squared_error(y_test, y_pred) m = np.mean(y_test) print "MSE (Mean)", mean_squared_error(y_test, m * np.ones(len(y_test))) print "r^2 on test data", r2_score(y_test, y_pred) plt.plot(enet.coef_, label='Elastic net coefficients') plt.plot(lasso.coef_, label='Lasso coefficients')
'sample_slicer__band': [[c] for c in np.unique(ds.sa.band)], 'target_trans__target':["age"], 'estimator__clf__C': [1], 'cv__n_splits': [50], 'analysis__radius':[9.], } _default_config = { 'prepro':['sample_slicer', 'feature_norm', 'target_trans'], 'sample_slicer__band': ['alpha'], 'sample_slicer__condition' : ['vipassana'], 'target_trans__target':"expertise_hours", 'estimator': [('clf', SVR(C=1, kernel='linear'))], 'estimator__clf__C':1, 'estimator__clf__kernel':'linear', 'cv': ShuffleSplit, 'cv__n_splits': 50, 'cv__test_size': 0.25, 'scores' : ['neg_mean_squared_error','r2'], 'analysis': SearchLight, 'analysis__n_jobs': 15, 'analysis__permutation':100, 'kwargs__cv_attr': 'subject', 'analysis__verbose':0,
def set_learning_method(config, X_train, y_train): """ Instantiates the sklearn's class corresponding to the value set in the configuration file for running the learning method. TODO: use reflection to instantiate the classes @param config: configuration object @return: an estimator with fit() and predict() methods """ estimator = None learning_cfg = config.get("learning", None) if learning_cfg: p = learning_cfg.get("parameters", None) o = learning_cfg.get("optimize", None) scorers = \ set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse'])) method_name = learning_cfg.get("method", None) if method_name == "SVR": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVR(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) elif p: estimator = SVR(C=p.get("C", 10), epsilon=p.get('epsilon', 0.01), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0034), tol=p.get('tol', 1e-3), verbose=False) else: estimator = SVR() elif method_name == "SVC": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVC(), X_train, y_train, tune_params, scorers, o.get('cv', 5), o.get('verbose', True), o.get('n_jobs', 1)) elif p: estimator = SVC(C=p.get('C', 1.0), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0), coef0=p.get('coef0', 0.0), tol=p.get('tol', 1e-3), verbose=p.get('verbose', False)) else: estimator = SVC() elif method_name == "LassoCV": if p: estimator = LassoCV(eps=p.get('eps', 1e-3), n_alphas=p.get('n_alphas', 100), normalize=p.get('normalize', False), precompute=p.get('precompute', 'auto'), max_iter=p.get('max_iter', 1000), tol=p.get('tol', 1e-4), cv=p.get('cv', 10), verbose=False) else: estimator = LassoCV() elif method_name == "LassoLars": if o: tune_params = set_optimization_params(o) estimator = optimize_model(LassoLars(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) if p: estimator = LassoLars(alpha=p.get('alpha', 1.0), fit_intercept=p.get( 'fit_intercept', True), verbose=p.get('verbose', False), normalize=p.get('normalize', True), max_iter=p.get('max_iter', 500), fit_path=p.get('fit_path', True)) else: estimator = LassoLars() elif method_name == "LassoLarsCV": if p: estimator = LassoLarsCV(max_iter=p.get('max_iter', 500), normalize=p.get('normalize', True), max_n_alphas=p.get( 'max_n_alphas', 1000), n_jobs=p.get('n_jobs', 1), cv=p.get('cv', 10), verbose=False) else: estimator = LassoLarsCV() return estimator, scorers
from ex30.ex30_lib_graph import plot2 from sklearn.svm.classes import SVR OUTPUT_PNG_FILE = '/experiments/ex30/ex30_svr.png' X = [[float(x)] for x in range(0, 24)] Y = [ 12.0, 13.0, 13.0, 13.0, 28.0, 31.0, 38.0, 60.0, 85.0, 80.0, 64.0, 60.0, 59.0, 58.0, 65.0, 70.0, 80.0, 90.0, 110.0, 100.0, 85.0, 65.0, 45.0, 20.0 ] X2 = [[float(x) / 10.0] for x in range(0, 231)] model = SVR(kernel='rbf', C=10) model.fit(X, Y) Y_pred = model.predict(X2) print(str(Y_pred)) plot2(Y, Y_pred, OUTPUT_PNG_FILE, "Observed pollution concentration levels", "Predicted pollution concentration levels by SVR")
'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(), 'RobustScaler':RobustScaler(), 'SGDClassifier':SGDClassifier(), 'SGDRegressor':SGDRegressor(), 'SVC':SVC(), 'SVR':SVR(), 'SelectFdr':SelectFdr(), 'SelectFpr':SelectFpr(), 'SelectFwe':SelectFwe(), 'SelectKBest':SelectKBest(), 'SelectPercentile':SelectPercentile(), 'ShrunkCovariance':ShrunkCovariance(), 'SkewedChi2Sampler':SkewedChi2Sampler(), 'SparsePCA':SparsePCA(), 'SparseRandomProjection':SparseRandomProjection(), 'SpectralBiclustering':SpectralBiclustering(), 'SpectralClustering':SpectralClustering(), 'SpectralCoclustering':SpectralCoclustering(), 'SpectralEmbedding':SpectralEmbedding(), 'StandardScaler':StandardScaler(), 'TSNE':TSNE(),
output = open(OUTPUT_DATA_FILE, 'w') output.write("location,observation,prediction\n") for location in locations: print(str(location)) trainX, testX, trainY, testY = splitDataForXValidation( location, "location", data, all_features, "target") normalizer_X = StandardScaler() trainX = normalizer_X.fit_transform(trainX) testX = normalizer_X.transform(testX) normalizer_Y = StandardScaler() trainY = normalizer_Y.fit_transform(trainY) testY = normalizer_Y.transform(testY) model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=40, cache_size=5000), max_samples=4200, n_estimators=10, verbose=0, n_jobs=-1) model.fit(trainX, trainY) prediction = model.predict(testX) prediction = normalizer_Y.inverse_transform(prediction) testY = normalizer_Y.inverse_transform(testY) for i in range(0, len(testY)): output.write(str(location)) output.write(",") output.write(str(testY[i])) output.write(",")
def train(driverSpeed, sectionSpeed, newData, firstTime, n, minLon, lonLen, minLat, latLen, defaultVel): '''返回SVR,由[路段平均速度,个人平均速度,载客信息]->瞬时速度训练得到''' X = [] Y = [] for file in newData: df = pandas.read_csv( file, header=None, names=["taxiId", "lat", "lon", "busy", "time", "vel", "sec"], dtype={ "taxiId": numpy.int16, "lat": numpy.float32, "lon": numpy.float32, "busy": numpy.int8, "time": numpy.str, "vel": numpy.float32, "sec": numpy.int16 }) taxiId1 = -1 sectionId1 = 0 busy1 = 0 time1 = firstTime for row in df.itertuples(index=False): taxiId2 = row[0] busy2 = row[3] time2 = datetime.datetime.strptime(row[4], "%Y/%m/%d %H:%M:%S") v = row[5] sectionId2 = row[6] if taxiId1 == taxiId2 and time1.hour == time2.hour and not numpy.isnan( v): #前一个点额瞬时速度 Y.append(v) x = [] #路段平均速度 v = sectionSpeed[sectionId1][time1.hour - firstTime.hour] if numpy.isnan(v): x.append(defaultVel) else: x.append(v) #个人平均速度 v = driverSpeed[taxiId1 - 1][time1.hour - firstTime.hour] if numpy.isnan(v): x.append(defaultVel) else: x.append(v) #是否载客 x.append(busy1) X.append(x) taxiId1 = taxiId2 busy1 = busy2 time1 = time2 sectionId1 = sectionId2 clf = SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) clf.fit(X, Y) return clf
classifier = DecisionTreeClassifier(max_depth=tree_depth) if alg == 1: classifier = RandomForestClassifier(n_estimators=random_forest_size, random_state=seed, n_jobs=10) if alg == 2: classifier = create_ensemble(seed) if alg == 3: classifier = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators=boosting_size, random_state=seed) if alg == 4: scaler = StandardScaler() svr = SVR(kernel='rbf', cache_size=4000, C=1e3, gamma=0.0001, max_iter=200000, epsilon=0.0001) classifier = Pipeline([('standardize', scaler), ('svr', svr)]) if alg == 5: classifier = GaussianNB() if classifier == "not_init": print("Classifier not init, exit") exit(-1) if debug: print("TRAINING MODEL...") classifier.fit(training_x_no_missing, training_y)