def test_ml_pipeline(): 'load a test data set, run SVM on it, and plot the predictions vs the actual values' data, targets = ReactivityDataLoader().load_mopac_learning() regressor = SVR(C=1000) trainData, testData, trainTargets, testTargets = train_test_split(data, targets) regressor.fit(trainData, trainTargets) os.chdir(str(Path.home() / 'Desktop')) main.plotScatterPlot(testTargets, regressor.predict(testData), 'predictedVsActual')
def SVR_ST(trainFileName, testFileName): trainData = ld.LoadData_DATA_ST(trainFileName) testData = ld.LoadData_DATA_ST(testFileName) store = ['1', '2', '3', '4', '5'] res = [] for i in store: train_X = [] train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:]] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [] items = [] context = testData[i] for array in context: items.append((array[0], array[1])) array = [float(x) for x in array[2:]] test_X.append((array[2:])) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2]) for i in range(len(test_X)): res.append([ items[i][0], items[i][1], '%.4f' % max(pred_y[i], 0), '%.4f' % test_X[i, -4], '%.4f' % (float(test_X[i, -5]) * 2) ]) return res
def SVR_ST(trainFileName,testFileName): trainData = ld.LoadData_DATA_ST(trainFileName) testData = ld.LoadData_DATA_ST(testFileName) store = ['1','2','3','4','5'] res = [] for i in store: train_X = [];train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:] ] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [];items = [] context = testData[i] for array in context: items.append((array[0],array[1])) array = [float(x) for x in array[2:] ] test_X.append((array[2:])) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2]) for i in range(len(test_X)): res.append([items[i][0],items[i][1],'%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)]) return res
def SVR_ST_train(): trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv') testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv') store = ['1','2','3','4','5'] res = [] for i in store: train_X = [];train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:] ] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [];test_y = [];items = [] context = testData[i] for array in context: items.append((array[0],array[1])) array = [float(x) for x in array[2:] ] test_X.append((array[2:-1])) test_y.append(array[-1]) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1]) for i in range(len(test_X)): res.append([items[i][0],items[i][1],'%.2f'%max(pred_y[i],0),'%.2f'%max(test_X[i,-4],0),'%.2f'%max(2*test_X[i,-5],0)]) return res
def SVR_ALL(trainFileName,testFileName): train_X,train_y,_= ld.LoadData_DATA_LABEL_ITEM(trainFileName) test_X,items= ld.LoadData_DATA_ITEM(testFileName) train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2]) res =[] for i in range(len(test_X)): res.append([items[i],'all','%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)]) return res
def SVR_ALL_train(): train_X,train_y,_= ld.loadData_all('./data/EVAL_DataSet1.csv') test_X,test_y,items = ld.loadData_all('./data/VALIDATION_DataSet1.csv') train_X=np.matrix(train_X) test_X = np.matrix(test_X) svr= SVR(kernel='linear',epsilon=0.5,C=1) pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1]) res =[] for i in range(len(test_X)): res.append([items[i],'all','%.2f'%max(pred_y[i],0),'%.2f'%test_X[i,-4],'%.2f'%(float(test_X[i,-5])*2)]) return res
class SVRImpl(): def __init__(self, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=(-1)): self._hyperparams = { 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'coef0': coef0, 'tol': tol, 'C': C, 'epsilon': epsilon, 'shrinking': shrinking, 'cache_size': cache_size, 'verbose': verbose, 'max_iter': max_iter } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def SVR_ALL(trainFileName, testFileName): train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName) test_X, items = ld.LoadData_DATA_ITEM(testFileName) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2]) res = [] for i in range(len(test_X)): res.append([ items[i], 'all', '%.4f' % max(pred_y[i], 0), '%.4f' % test_X[i, -4], '%.4f' % (float(test_X[i, -5]) * 2) ]) return res
def SVR_ALL_train(): train_X, train_y, _ = ld.loadData_all('./data/EVAL_DataSet1.csv') test_X, test_y, items = ld.loadData_all('./data/VALIDATION_DataSet1.csv') train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1]) res = [] for i in range(len(test_X)): res.append([ items[i], 'all', '%.2f' % max(pred_y[i], 0), '%.2f' % test_X[i, -4], '%.2f' % (float(test_X[i, -5]) * 2) ]) return res
def SVR_ST_train(): trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv') testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv') store = ['1', '2', '3', '4', '5'] res = [] for i in store: train_X = [] train_y = [] context = trainData[i] for array in context: array = [float(x) for x in array[2:]] train_X.append((array[2:-1])) train_y.append(array[-1]) test_X = [] test_y = [] items = [] context = testData[i] for array in context: items.append((array[0], array[1])) array = [float(x) for x in array[2:]] test_X.append((array[2:-1])) test_y.append(array[-1]) train_X = np.matrix(train_X) test_X = np.matrix(test_X) svr = SVR(kernel='linear', epsilon=0.5, C=1) pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1]) for i in range(len(test_X)): res.append([ items[i][0], items[i][1], '%.2f' % max(pred_y[i], 0), '%.2f' % max(test_X[i, -4], 0), '%.2f' % max(2 * test_X[i, -5], 0) ]) return res
def train(driverSpeed, sectionSpeed, newData, firstTime, n, minLon, lonLen, minLat, latLen, defaultVel): '''返回SVR,由[路段平均速度,个人平均速度,载客信息]->瞬时速度训练得到''' X = [] Y = [] for file in newData: df = pandas.read_csv( file, header=None, names=["taxiId", "lat", "lon", "busy", "time", "vel", "sec"], dtype={ "taxiId": numpy.int16, "lat": numpy.float32, "lon": numpy.float32, "busy": numpy.int8, "time": numpy.str, "vel": numpy.float32, "sec": numpy.int16 }) taxiId1 = -1 sectionId1 = 0 busy1 = 0 time1 = firstTime for row in df.itertuples(index=False): taxiId2 = row[0] busy2 = row[3] time2 = datetime.datetime.strptime(row[4], "%Y/%m/%d %H:%M:%S") v = row[5] sectionId2 = row[6] if taxiId1 == taxiId2 and time1.hour == time2.hour and not numpy.isnan( v): #前一个点额瞬时速度 Y.append(v) x = [] #路段平均速度 v = sectionSpeed[sectionId1][time1.hour - firstTime.hour] if numpy.isnan(v): x.append(defaultVel) else: x.append(v) #个人平均速度 v = driverSpeed[taxiId1 - 1][time1.hour - firstTime.hour] if numpy.isnan(v): x.append(defaultVel) else: x.append(v) #是否载客 x.append(busy1) X.append(x) taxiId1 = taxiId2 busy1 = busy2 time1 = time2 sectionId1 = sectionId2 clf = SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) clf.fit(X, Y) return clf
def __sv_regressor__(self, data, target): from sklearn.svm.classes import SVR svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_rbf.fit(data, target) self.ensemble = svr_rbf
from ex30.ex30_lib_graph import plot2 from sklearn.svm.classes import SVR OUTPUT_PNG_FILE = '/experiments/ex30/ex30_svr.png' X = [[float(x)] for x in range(0, 24)] Y = [ 12.0, 13.0, 13.0, 13.0, 28.0, 31.0, 38.0, 60.0, 85.0, 80.0, 64.0, 60.0, 59.0, 58.0, 65.0, 70.0, 80.0, 90.0, 110.0, 100.0, 85.0, 65.0, 45.0, 20.0 ] X2 = [[float(x) / 10.0] for x in range(0, 231)] model = SVR(kernel='rbf', C=10) model.fit(X, Y) Y_pred = model.predict(X2) print(str(Y_pred)) plot2(Y, Y_pred, OUTPUT_PNG_FILE, "Observed pollution concentration levels", "Predicted pollution concentration levels by SVR")