Exemple #1
0
def SVR_ST(trainFileName, testFileName):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFileName)

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:]))

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.4f' % max(pred_y[i], 0),
                '%.4f' % test_X[i, -4],
                '%.4f' % (float(test_X[i, -5]) * 2)
            ])
    return res
Exemple #2
0
def SVR_ST(trainFileName,testFileName):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFileName)
    
    store = ['1','2','3','4','5']
    res = []
    for i in store:
        train_X = [];train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:] ]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])
        
        test_X = [];items = []
        context = testData[i]
        for array in context:
            items.append((array[0],array[1]))
            array = [float(x) for x in array[2:] ]
            test_X.append((array[2:]))
            
        train_X=np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr= SVR(kernel='linear',epsilon=0.5,C=1)
        pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2])
        for i in range(len(test_X)):
            res.append([items[i][0],items[i][1],'%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)])
    return res
Exemple #3
0
def SVR_ST_train():
    trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv')
    testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv')

    store = ['1','2','3','4','5']
    res = []
    for i in store:
        train_X = [];train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:] ]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])
        
        test_X = [];test_y = [];items = []
        context = testData[i]
        for array in context:
            items.append((array[0],array[1]))
            array = [float(x) for x in array[2:] ]
            test_X.append((array[2:-1]))
            test_y.append(array[-1])
        
        train_X=np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr= SVR(kernel='linear',epsilon=0.5,C=1)
        pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1])
        for i in range(len(test_X)):
            res.append([items[i][0],items[i][1],'%.2f'%max(pred_y[i],0),'%.2f'%max(test_X[i,-4],0),'%.2f'%max(2*test_X[i,-5],0)])
    return res
def test_ml_pipeline():
    'load a test data set, run SVM on it, and plot the predictions vs the actual values'
    data, targets = ReactivityDataLoader().load_mopac_learning()
    regressor = SVR(C=1000)
    trainData, testData, trainTargets, testTargets = train_test_split(data, targets)
    regressor.fit(trainData, trainTargets)
    os.chdir(str(Path.home() / 'Desktop'))
    main.plotScatterPlot(testTargets, regressor.predict(testData), 'predictedVsActual')
Exemple #5
0
def SVR_ALL(trainFileName,testFileName):
    train_X,train_y,_= ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    test_X,items= ld.LoadData_DATA_ITEM(testFileName)
    train_X=np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr= SVR(kernel='linear',epsilon=0.5,C=1)
    pred_y=svr.fit(train_X[:,-8:-3], train_y).predict(test_X[:,-7:-2])
    res =[]
    for i in range(len(test_X)):
        res.append([items[i],'all','%.4f'%max(pred_y[i],0),'%.4f'%test_X[i,-4],'%.4f'%(float(test_X[i,-5])*2)])
    return res
Exemple #6
0
def SVR_ALL_train():
    train_X,train_y,_= ld.loadData_all('./data/EVAL_DataSet1.csv')
    test_X,test_y,items = ld.loadData_all('./data/VALIDATION_DataSet1.csv')
    train_X=np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr= SVR(kernel='linear',epsilon=0.5,C=1)
    pred_y=svr.fit(train_X[:,-8:-1], train_y).predict(test_X[:,-8:-1])
    res =[]
    for i in range(len(test_X)):
        res.append([items[i],'all','%.2f'%max(pred_y[i],0),'%.2f'%test_X[i,-4],'%.2f'%(float(test_X[i,-5])*2)])
    return res
Exemple #7
0
 def init_model(self):
     return SVR(kernel="rbf",
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                gamma=self.gamma)
Exemple #8
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
    def run(self):
        kernel_lookup = {
            'Radial Basis Function': 'rbf',
            'Linear': 'linear',
            'Polynomial': 'poly',
            'Sigmoid': 'sigmoid',
            'Precomputed': 'precomputed'
        }
        kernel = kernel_lookup[self.kernelComboBox.currentText()]

        params = {
            'C': self.cDoubleSpinBox.value(),
            'epsilon': self.epsilonDoubleSpinBox.value(),
            'kernel': kernel,
            'degree': self.degreeSpinBox.value(),
            'gamma': self.gammaComboBox.currentText(),
            'coef0': self.coeff0DoubleSpinBox.value(),
            'shrinking': self.shrinkingCheckBox.isChecked(),
            'tol': self.toleranceDoubleSpinBox.value(),
            'cache_size': self.cacheSizeSpinBox.value(),
            'verbose': self.verboseCheckBox.isChecked(),
            'max_iter': int(self.maxIterationsSpinBox.value())
        }

        return params, self.getChangedValues(params, SVR())
Exemple #10
0
 def __init__(self,
              kernel='rbf',
              degree=3,
              gamma='auto_deprecated',
              coef0=0.0,
              tol=0.001,
              C=1.0,
              epsilon=0.1,
              shrinking=True,
              cache_size=200,
              verbose=False,
              max_iter=(-1)):
     self._hyperparams = {
         'kernel': kernel,
         'degree': degree,
         'gamma': gamma,
         'coef0': coef0,
         'tol': tol,
         'C': C,
         'epsilon': epsilon,
         'shrinking': shrinking,
         'cache_size': cache_size,
         'verbose': verbose,
         'max_iter': max_iter
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
Exemple #11
0
def SVR_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    test_X, items = ld.LoadData_DATA_ITEM(testFileName)
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.4f' % max(pred_y[i], 0),
            '%.4f' % test_X[i, -4],
            '%.4f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Exemple #12
0
def SVR_ALL_train():
    train_X, train_y, _ = ld.loadData_all('./data/EVAL_DataSet1.csv')
    test_X, test_y, items = ld.loadData_all('./data/VALIDATION_DataSet1.csv')
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.2f' % max(pred_y[i], 0),
            '%.2f' % test_X[i, -4],
            '%.2f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Exemple #13
0
 def init_model(self):
     return SVR(kernel="sigmoid",
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                coef0=self.coef0,
                gamma=self.gamma)
Exemple #14
0
 def init_model(self):
     return SVR(kernel="poly",
                degree=self.degree,
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                coef0=self.coef0,
                gamma=self.gamma)
Exemple #15
0
class SVRImpl():
    def __init__(self,
                 kernel='rbf',
                 degree=3,
                 gamma='auto_deprecated',
                 coef0=0.0,
                 tol=0.001,
                 C=1.0,
                 epsilon=0.1,
                 shrinking=True,
                 cache_size=200,
                 verbose=False,
                 max_iter=(-1)):
        self._hyperparams = {
            'kernel': kernel,
            'degree': degree,
            'gamma': gamma,
            'coef0': coef0,
            'tol': tol,
            'C': C,
            'epsilon': epsilon,
            'shrinking': shrinking,
            'cache_size': cache_size,
            'verbose': verbose,
            'max_iter': max_iter
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemple #16
0
def SVR_ST_train():
    trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv')
    testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv')

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        test_y = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:-1]))
            test_y.append(array[-1])

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.2f' % max(pred_y[i], 0),
                '%.2f' % max(test_X[i, -4], 0),
                '%.2f' % max(2 * test_X[i, -5], 0)
            ])
    return res
    def run(self):
        params = {
            'C': self.cDoubleSpinBox.value(),
            'epsilon': self.epsilonDoubleSpinBox.value(),
            'kernel': self.kernelComboBox.currentText(),
            'degree': self.degreeSpinBox.value(),
            'gamma': self.gammaComboBox.currentText(),
            'coef0': self.coeff0DoubleSpinBox.value(),
            'shrinking': self.shrinkingCheckBox.isChecked(),
            'tol': self.toleranceDoubleSpinBox.value(),
            'cache_size': self.cacheSizeSpinBox.value(),
            'verbose': self.verboseCheckBox.isChecked(),
            'max_iter': int(self.maxIterationsSpinBox.value())
        }

        return params, self.getChangedValues(params, SVR())
def evalOne(parameters):
    all_obs = []
    all_pred = []
    for location in locations:
        trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target")
        normalizer_X = StandardScaler()
        trainX = normalizer_X.fit_transform(trainX)
        testX = normalizer_X.transform(testX)
        normalizer_Y = StandardScaler()
        trainY = normalizer_Y.fit_transform(trainY)
        testY = normalizer_Y.transform(testY)
        model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1)
        model.fit(trainX, trainY)
        prediction = model.predict(testX)
        prediction = normalizer_Y.inverse_transform(prediction)
        testY = normalizer_Y.inverse_transform(testY)
        all_obs.extend(testY)
        all_pred.extend(prediction)
        
    return rmseEval(all_obs, all_pred)[1]
 def individual_training_executor(self, dim):
     # make a pipeline with preprocessing, autoencoder, regression
     scaler = MinMaxScaler(feature_range=(-0.5,0.5))
     autoencoder = Autoencoder(logPath=self.get_path(dim), hiddenDims=[50,dim],beta=0.1)
     mlPipeline = make_pipeline(scaler, autoencoder)
     
     # read in the data and train the autoencoder
     data, targets = self.read_mopac_reactivity_data()
     mlPipeline.fit(data, targets)
     
     # test the accuracy of an SVM on the transformed data using cross validation
     latent = mlPipeline.transform(data)
     regressor = SVR(C=10000)
     cross_validator = KFold(n_splits=5, shuffle=True, random_state=40)
     predictions = cross_val_predict(regressor, latent, targets, cv=cross_validator)
     
     # make a cross_val_predict-ed vs actual graph
     main.plotScatterPlot(targets, predictions, 'predictedVsActual')
     
     # print the cross validation actual and predicted targets to file
     actualThenPredicted = np.array([targets, predictions])
     np.savetxt('actualThenPredicted.txt', actualThenPredicted)
Exemple #20
0
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    ExtraTreeRegressor(random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    SVR()  # C: 0.25, 0.5, 1, 5, 10
]

selectors = [
    reliefF.reliefF,
    fisher_score.fisher_score,
    # chi_square.chi_square,
    JMI.jmi,
    CIFE.cife,
    DISR.disr,
    MIM.mim,
    CMIM.cmim,
    ICAP.icap,
    MRMR.mrmr,
    MIFS.mifs
]
                         l1_ratio=0.25,
                         fit_intercept=True),
            'complexity_label':
            'non-zero coefficients',
            'complexity_computer':
            lambda clf: np.count_nonzero(clf.coef_)
        },
        {
            'name': 'RandomForest',
            'instance': RandomForestRegressor(),
            'complexity_label': 'estimators',
            'complexity_computer': lambda clf: clf.n_estimators
        },
        {
            'name': 'SVR',
            'instance': SVR(kernel='rbf'),
            'complexity_label': 'support vectors',
            'complexity_computer': lambda clf: len(clf.support_vectors_)
        },
    ]
}
benchmark(configuration)

# benchmark n_features influence on prediction speed
percentile = 90
percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'],
                                  configuration['n_test'], [100, 250, 500],
                                  percentile)
plot_n_features_influence(percentiles, percentile)

# benchmark throughput
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cLineEdit.setText(str(svr.C))
        self.epsilonLineEdit.setText(str(svr.epsilon))
        self.kernel_list.setCurrentItem(
            self.kernel_list.findItems('Radial Basis Function',
                                       QtCore.Qt.MatchExactly)[0])
        self.degreeLineEdit.setText(str(svr.degree))
        self.coeff0LineEdit.setText(str(svr.coef0))
        self.shrinking_list.setCurrentItem(
            self.shrinking_list.findItems(str(svr.shrinking),
                                          QtCore.Qt.MatchExactly)[0])
        self.toleranceLineEdit.setText(str(svr.tol))
        self.maxIterationsLineEdit.setText(str(svr.max_iter))
Exemple #23
0
    task='meg')
ds = loader.fetch()

# Preprocessing
pipeline = PreprocessingPipeline(nodes=[
    SampleSlicer({
        'band': ['alpha'],
        'condition': ['vipassana']
    }),
    FeatureWiseNormalizer(),
    TargetTransformer("expertise_hours")
])
ds_ = pipeline.transform(ds)

# Estimator
estimator_pp = Pipeline(steps=[('svr', SVR(C=1, kernel='linear'))])

cross_validation = GroupShuffleSplit(n_splits=10, test_size=0.25)
scores = ['r2', 'explained_variance']
cv_attr = 'subject'

sl = SearchLight(estimator=estimator_pp, scoring=scores, cv=cross_validation)
sl.fit(ds_, cv_attr=cv_attr)

#### Cross Validation ###
cross_validation = GroupShuffleSplit(n_splits=150, test_size=0.25)
groups = LabelEncoder().fit_transform(ds_.sa.subject)
X = ds_.samples
y = LabelEncoder().fit_transform(ds_.targets)
train_list = []
for train, test in cross_validation.split(X, y, groups=groups):
Exemple #24
0
K_N_N = KNeighborsClassifier()
SUPPORT_VECTOR = svm.SVC(kernel="linear")

# Ensemble classifiers
RANDOM_FOREST = RandomForestClassifier(n_estimators=100)
GRADIENT_BOOST_CL = GradientBoostingClassifier(n_estimators=100)
ADA_BOOST = AdaBoostClassifier(n_estimators=100)
EXTRA_TREE = ExtraTreesClassifier(n_estimators=100)


# Regressors
GRADIENT_BOOST_RG = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
LINEAR_RG = LinearRegression()
RIDGE_RG = Ridge()
LASSO_RG = Lasso()
SVR_RG = SVR()

def getClassifierMap():
    CLASSIFIER_MAP = {
    "DECISION_TREE": DECISION_TREE,
    "LOGISTIC_REGRESSION": LOGISTIC_REGRESSION,
    "NAIVE_BAYS": NAIVE_BAYS,
    "K_N_N": K_N_N,
    "SUPPORT_VECTOR": SUPPORT_VECTOR,
    "RANDOM_FOREST": RANDOM_FOREST,
    "GRADIENT_BOOST": GRADIENT_BOOST_CL,
    "ADA_BOOST": GRADIENT_BOOST_CL,
    "EXTRA_TREE": EXTRA_TREE
    }
    return CLASSIFIER_MAP
 def __sv_regressor__(self, data, target):
     from sklearn.svm.classes import SVR
     svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
     svr_rbf.fit(data, target)
     self.ensemble = svr_rbf
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cDoubleSpinBox.setValue(svr.C)
        self.epsilonDoubleSpinBox.setValue(svr.epsilon)
        self.defaultComboItem(self.kernelComboBox, svr.kernel)
        self.degreeSpinBox.setValue(svr.degree)
        self.defaultComboItem(self.gammaComboBox, svr.gamma)
        self.coeff0DoubleSpinBox.setValue(svr.coef0)
        self.shrinkingCheckBox.setChecked(svr.shrinking)
        self.toleranceDoubleSpinBox.setValue(svr.tol)
        self.cacheSizeSpinBox.setValue(svr.cache_size)
        self.verboseCheckBox.setChecked(svr.verbose)
        self.maxIterationsSpinBox.setValue(svr.max_iter)
Exemple #27
0
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape

print X_train[123, :]
'''
norm1 =  np.linalg.norm(y_train)    
if norm1 != 0:   
    y_train, y_test =  y_train/norm1, y_test/norm1
print norm1
'''

print y_train.shape

model = SVR(C=1.0, gamma=1.0)
model = LinearRegression()

lasso = Lasso(alpha=0.1).fit(X_train, y_train)
enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train)

y_pred = lasso.predict(X_test)

print "MSE", mean_squared_error(y_test, y_pred)
m = np.mean(y_test)
print "MSE (Mean)", mean_squared_error(y_test, m * np.ones(len(y_test)))

print "r^2 on test data", r2_score(y_test, y_pred)

plt.plot(enet.coef_, label='Elastic net coefficients')
plt.plot(lasso.coef_, label='Lasso coefficients')
                       'sample_slicer__band': [[c] for c in np.unique(ds.sa.band)],
                       'target_trans__target':["age"],
                       'estimator__clf__C': [1],                          
                       'cv__n_splits': [50],
                       'analysis__radius':[9.],
                        }


_default_config = {
               
                        'prepro':['sample_slicer', 'feature_norm', 'target_trans'],
                        'sample_slicer__band': ['alpha'], 
                        'sample_slicer__condition' : ['vipassana'],
                        'target_trans__target':"expertise_hours",
                        
                        'estimator': [('clf', SVR(C=1, kernel='linear'))],
                        'estimator__clf__C':1,
                        'estimator__clf__kernel':'linear',
                        
                        'cv': ShuffleSplit,
                        'cv__n_splits': 50,
                        'cv__test_size': 0.25,
                        
                        'scores' : ['neg_mean_squared_error','r2'],
                        
                        'analysis': SearchLight,
                        'analysis__n_jobs': 15,
                        'analysis__permutation':100,
                        'kwargs__cv_attr': 'subject',
                        'analysis__verbose':0,
Exemple #29
0
def set_learning_method(config, X_train, y_train):
    """
    Instantiates the sklearn's class corresponding to the value set in the 
    configuration file for running the learning method.
    
    TODO: use reflection to instantiate the classes
    
    @param config: configuration object
    @return: an estimator with fit() and predict() methods
    """
    estimator = None

    learning_cfg = config.get("learning", None)
    if learning_cfg:
        p = learning_cfg.get("parameters", None)
        o = learning_cfg.get("optimize", None)
        scorers = \
        set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse']))

        method_name = learning_cfg.get("method", None)
        if method_name == "SVR":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVR(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = SVR(C=p.get("C", 10),
                                epsilon=p.get('epsilon', 0.01),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0034),
                                tol=p.get('tol', 1e-3),
                                verbose=False)
            else:
                estimator = SVR()

        elif method_name == "SVC":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVC(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get('cv', 5),
                                           o.get('verbose', True),
                                           o.get('n_jobs', 1))

            elif p:
                estimator = SVC(C=p.get('C', 1.0),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0),
                                coef0=p.get('coef0', 0.0),
                                tol=p.get('tol', 1e-3),
                                verbose=p.get('verbose', False))
            else:
                estimator = SVC()

        elif method_name == "LassoCV":
            if p:
                estimator = LassoCV(eps=p.get('eps', 1e-3),
                                    n_alphas=p.get('n_alphas', 100),
                                    normalize=p.get('normalize', False),
                                    precompute=p.get('precompute', 'auto'),
                                    max_iter=p.get('max_iter', 1000),
                                    tol=p.get('tol', 1e-4),
                                    cv=p.get('cv', 10),
                                    verbose=False)
            else:
                estimator = LassoCV()

        elif method_name == "LassoLars":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(LassoLars(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            if p:
                estimator = LassoLars(alpha=p.get('alpha', 1.0),
                                      fit_intercept=p.get(
                                          'fit_intercept', True),
                                      verbose=p.get('verbose', False),
                                      normalize=p.get('normalize', True),
                                      max_iter=p.get('max_iter', 500),
                                      fit_path=p.get('fit_path', True))
            else:
                estimator = LassoLars()

        elif method_name == "LassoLarsCV":
            if p:
                estimator = LassoLarsCV(max_iter=p.get('max_iter', 500),
                                        normalize=p.get('normalize', True),
                                        max_n_alphas=p.get(
                                            'max_n_alphas', 1000),
                                        n_jobs=p.get('n_jobs', 1),
                                        cv=p.get('cv', 10),
                                        verbose=False)
            else:
                estimator = LassoLarsCV()

    return estimator, scorers
from ex30.ex30_lib_graph import plot2
from sklearn.svm.classes import SVR

OUTPUT_PNG_FILE = '/experiments/ex30/ex30_svr.png'

X = [[float(x)] for x in range(0, 24)]
Y = [
    12.0, 13.0, 13.0, 13.0, 28.0, 31.0, 38.0, 60.0, 85.0, 80.0, 64.0, 60.0,
    59.0, 58.0, 65.0, 70.0, 80.0, 90.0, 110.0, 100.0, 85.0, 65.0, 45.0, 20.0
]

X2 = [[float(x) / 10.0] for x in range(0, 231)]

model = SVR(kernel='rbf', C=10)
model.fit(X, Y)
Y_pred = model.predict(X2)

print(str(Y_pred))

plot2(Y, Y_pred, OUTPUT_PNG_FILE, "Observed pollution concentration levels",
      "Predicted pollution concentration levels by SVR")
Exemple #31
0
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
			'RobustScaler':RobustScaler(),
			'SGDClassifier':SGDClassifier(),
			'SGDRegressor':SGDRegressor(),
			'SVC':SVC(),
			'SVR':SVR(),
			'SelectFdr':SelectFdr(),
			'SelectFpr':SelectFpr(),
			'SelectFwe':SelectFwe(),
			'SelectKBest':SelectKBest(),
			'SelectPercentile':SelectPercentile(),
			'ShrunkCovariance':ShrunkCovariance(),
			'SkewedChi2Sampler':SkewedChi2Sampler(),
			'SparsePCA':SparsePCA(),
			'SparseRandomProjection':SparseRandomProjection(),
			'SpectralBiclustering':SpectralBiclustering(),
			'SpectralClustering':SpectralClustering(),
			'SpectralCoclustering':SpectralCoclustering(),
			'SpectralEmbedding':SpectralEmbedding(),
			'StandardScaler':StandardScaler(),
			'TSNE':TSNE(),
output = open(OUTPUT_DATA_FILE, 'w')
output.write("location,observation,prediction\n")

for location in locations:
    print(str(location))
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, all_features, "target")
    normalizer_X = StandardScaler()
    trainX = normalizer_X.fit_transform(trainX)
    testX = normalizer_X.transform(testX)
    normalizer_Y = StandardScaler()
    trainY = normalizer_Y.fit_transform(trainY)
    testY = normalizer_Y.transform(testY)
    model = BaggingRegressor(base_estimator=SVR(kernel='rbf',
                                                C=40,
                                                cache_size=5000),
                             max_samples=4200,
                             n_estimators=10,
                             verbose=0,
                             n_jobs=-1)
    model.fit(trainX, trainY)
    prediction = model.predict(testX)
    prediction = normalizer_Y.inverse_transform(prediction)
    testY = normalizer_Y.inverse_transform(testY)

    for i in range(0, len(testY)):
        output.write(str(location))
        output.write(",")
        output.write(str(testY[i]))
        output.write(",")
Exemple #33
0
def train(driverSpeed, sectionSpeed, newData, firstTime, n, minLon, lonLen,
          minLat, latLen, defaultVel):
    '''返回SVR,由[路段平均速度,个人平均速度,载客信息]->瞬时速度训练得到'''
    X = []
    Y = []
    for file in newData:
        df = pandas.read_csv(
            file,
            header=None,
            names=["taxiId", "lat", "lon", "busy", "time", "vel", "sec"],
            dtype={
                "taxiId": numpy.int16,
                "lat": numpy.float32,
                "lon": numpy.float32,
                "busy": numpy.int8,
                "time": numpy.str,
                "vel": numpy.float32,
                "sec": numpy.int16
            })

        taxiId1 = -1
        sectionId1 = 0
        busy1 = 0
        time1 = firstTime
        for row in df.itertuples(index=False):
            taxiId2 = row[0]
            busy2 = row[3]
            time2 = datetime.datetime.strptime(row[4], "%Y/%m/%d %H:%M:%S")
            v = row[5]
            sectionId2 = row[6]
            if taxiId1 == taxiId2 and time1.hour == time2.hour and not numpy.isnan(
                    v):
                #前一个点额瞬时速度
                Y.append(v)
                x = []
                #路段平均速度
                v = sectionSpeed[sectionId1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #个人平均速度
                v = driverSpeed[taxiId1 - 1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #是否载客
                x.append(busy1)
                X.append(x)
            taxiId1 = taxiId2
            busy1 = busy2
            time1 = time2
            sectionId1 = sectionId2

    clf = SVR(C=1.0,
              cache_size=200,
              coef0=0.0,
              degree=3,
              epsilon=0.2,
              gamma='auto',
              kernel='rbf',
              max_iter=-1,
              shrinking=True,
              tol=0.001,
              verbose=False)
    clf.fit(X, Y)

    return clf
Exemple #34
0
        classifier = DecisionTreeClassifier(max_depth=tree_depth)
    if alg == 1:
        classifier = RandomForestClassifier(n_estimators=random_forest_size,
                                            random_state=seed,
                                            n_jobs=10)
    if alg == 2:
        classifier = create_ensemble(seed)
    if alg == 3:
        classifier = AdaBoostClassifier(DecisionTreeClassifier(),
                                        n_estimators=boosting_size,
                                        random_state=seed)
    if alg == 4:
        scaler = StandardScaler()
        svr = SVR(kernel='rbf',
                  cache_size=4000,
                  C=1e3,
                  gamma=0.0001,
                  max_iter=200000,
                  epsilon=0.0001)
        classifier = Pipeline([('standardize', scaler), ('svr', svr)])
    if alg == 5:
        classifier = GaussianNB()

    if classifier == "not_init":
        print("Classifier not init, exit")
        exit(-1)

    if debug:
        print("TRAINING MODEL...")

    classifier.fit(training_x_no_missing, training_y)