def test_ml_pipeline():
    'load a test data set, run SVM on it, and plot the predictions vs the actual values'
    data, targets = ReactivityDataLoader().load_mopac_learning()
    regressor = SVR(C=1000)
    trainData, testData, trainTargets, testTargets = train_test_split(data, targets)
    regressor.fit(trainData, trainTargets)
    os.chdir(str(Path.home() / 'Desktop'))
    main.plotScatterPlot(testTargets, regressor.predict(testData), 'predictedVsActual')
Ejemplo n.º 2
0
def SVR_ST(trainFileName, testFileName):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFileName)

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:]))

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.4f' % max(pred_y[i], 0),
                '%.4f' % test_X[i, -4],
                '%.4f' % (float(test_X[i, -5]) * 2)
            ])
    return res
Ejemplo n.º 3
0
def SVR_ST(trainFileName,testFileName):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFileName)
    
    store = ['1','2','3','4','5']
    res = []
    for i in store:
        train_X = [];train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:] ]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])
        
        test_X = [];items = []
        context = testData[i]
        for array in context:
            items.append((array[0],array[1]))
            array = [float(x) for x in array[2:] ]
            test_X.append((array[2:]))
            
        train_X=np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr= SVR(kernel='linear',epsilon=0.5,C=1)
        pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2])
        for i in range(len(test_X)):
            res.append([items[i][0],items[i][1],'%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)])
    return res
Ejemplo n.º 4
0
def SVR_ST_train():
    trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv')
    testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv')

    store = ['1','2','3','4','5']
    res = []
    for i in store:
        train_X = [];train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:] ]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])
        
        test_X = [];test_y = [];items = []
        context = testData[i]
        for array in context:
            items.append((array[0],array[1]))
            array = [float(x) for x in array[2:] ]
            test_X.append((array[2:-1]))
            test_y.append(array[-1])
        
        train_X=np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr= SVR(kernel='linear',epsilon=0.5,C=1)
        pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1])
        for i in range(len(test_X)):
            res.append([items[i][0],items[i][1],'%.2f'%max(pred_y[i],0),'%.2f'%max(test_X[i,-4],0),'%.2f'%max(2*test_X[i,-5],0)])
    return res
Ejemplo n.º 5
0
def SVR_ALL(trainFileName,testFileName):
    train_X,train_y,_= ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    test_X,items= ld.LoadData_DATA_ITEM(testFileName)
    train_X=np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr= SVR(kernel='linear',epsilon=0.5,C=1)
    pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2])
    res =[]
    for i in range(len(test_X)):
        res.append([items[i],'all','%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)])
    return res
Ejemplo n.º 6
0
def SVR_ALL_train():
    train_X,train_y,_= ld.loadData_all('./data/EVAL_DataSet1.csv')
    test_X,test_y,items = ld.loadData_all('./data/VALIDATION_DataSet1.csv')
    train_X=np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr= SVR(kernel='linear',epsilon=0.5,C=1)
    pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1])
    res =[]
    for i in range(len(test_X)):
        res.append([items[i],'all','%.2f'%max(pred_y[i],0),'%.2f'%test_X[i,-4],'%.2f'%(float(test_X[i,-5])*2)])
    return res
Ejemplo n.º 7
0
class SVRImpl():
    def __init__(self,
                 kernel='rbf',
                 degree=3,
                 gamma='auto_deprecated',
                 coef0=0.0,
                 tol=0.001,
                 C=1.0,
                 epsilon=0.1,
                 shrinking=True,
                 cache_size=200,
                 verbose=False,
                 max_iter=(-1)):
        self._hyperparams = {
            'kernel': kernel,
            'degree': degree,
            'gamma': gamma,
            'coef0': coef0,
            'tol': tol,
            'C': C,
            'epsilon': epsilon,
            'shrinking': shrinking,
            'cache_size': cache_size,
            'verbose': verbose,
            'max_iter': max_iter
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Ejemplo n.º 8
0
def SVR_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    test_X, items = ld.LoadData_DATA_ITEM(testFileName)
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.4f' % max(pred_y[i], 0),
            '%.4f' % test_X[i, -4],
            '%.4f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Ejemplo n.º 9
0
def SVR_ALL_train():
    train_X, train_y, _ = ld.loadData_all('./data/EVAL_DataSet1.csv')
    test_X, test_y, items = ld.loadData_all('./data/VALIDATION_DataSet1.csv')
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.2f' % max(pred_y[i], 0),
            '%.2f' % test_X[i, -4],
            '%.2f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Ejemplo n.º 10
0
def SVR_ST_train():
    trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv')
    testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv')

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        test_y = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:-1]))
            test_y.append(array[-1])

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.2f' % max(pred_y[i], 0),
                '%.2f' % max(test_X[i, -4], 0),
                '%.2f' % max(2 * test_X[i, -5], 0)
            ])
    return res
Ejemplo n.º 11
0
def train(driverSpeed, sectionSpeed, newData, firstTime, n, minLon, lonLen,
          minLat, latLen, defaultVel):
    '''返回SVR,由[路段平均速度,个人平均速度,载客信息]->瞬时速度训练得到'''
    X = []
    Y = []
    for file in newData:
        df = pandas.read_csv(
            file,
            header=None,
            names=["taxiId", "lat", "lon", "busy", "time", "vel", "sec"],
            dtype={
                "taxiId": numpy.int16,
                "lat": numpy.float32,
                "lon": numpy.float32,
                "busy": numpy.int8,
                "time": numpy.str,
                "vel": numpy.float32,
                "sec": numpy.int16
            })

        taxiId1 = -1
        sectionId1 = 0
        busy1 = 0
        time1 = firstTime
        for row in df.itertuples(index=False):
            taxiId2 = row[0]
            busy2 = row[3]
            time2 = datetime.datetime.strptime(row[4], "%Y/%m/%d %H:%M:%S")
            v = row[5]
            sectionId2 = row[6]
            if taxiId1 == taxiId2 and time1.hour == time2.hour and not numpy.isnan(
                    v):
                #前一个点额瞬时速度
                Y.append(v)
                x = []
                #路段平均速度
                v = sectionSpeed[sectionId1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #个人平均速度
                v = driverSpeed[taxiId1 - 1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #是否载客
                x.append(busy1)
                X.append(x)
            taxiId1 = taxiId2
            busy1 = busy2
            time1 = time2
            sectionId1 = sectionId2

    clf = SVR(C=1.0,
              cache_size=200,
              coef0=0.0,
              degree=3,
              epsilon=0.2,
              gamma='auto',
              kernel='rbf',
              max_iter=-1,
              shrinking=True,
              tol=0.001,
              verbose=False)
    clf.fit(X, Y)

    return clf
 def __sv_regressor__(self, data, target):
     from sklearn.svm.classes import SVR
     svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
     svr_rbf.fit(data, target)
     self.ensemble = svr_rbf
Ejemplo n.º 13
0
from ex30.ex30_lib_graph import plot2
from sklearn.svm.classes import SVR

OUTPUT_PNG_FILE = '/experiments/ex30/ex30_svr.png'

X = [[float(x)] for x in range(0, 24)]
Y = [
    12.0, 13.0, 13.0, 13.0, 28.0, 31.0, 38.0, 60.0, 85.0, 80.0, 64.0, 60.0,
    59.0, 58.0, 65.0, 70.0, 80.0, 90.0, 110.0, 100.0, 85.0, 65.0, 45.0, 20.0
]

X2 = [[float(x) / 10.0] for x in range(0, 231)]

model = SVR(kernel='rbf', C=10)
model.fit(X, Y)
Y_pred = model.predict(X2)

print(str(Y_pred))

plot2(Y, Y_pred, OUTPUT_PNG_FILE, "Observed pollution concentration levels",
      "Predicted pollution concentration levels by SVR")