Ejemplo n.º 1
0
    def run(self):
        kernel_lookup = {
            'Radial Basis Function': 'rbf',
            'Linear': 'linear',
            'Polynomial': 'poly',
            'Sigmoid': 'sigmoid',
            'Precomputed': 'precomputed'
        }
        kernel = kernel_lookup[self.kernelComboBox.currentText()]

        params = {
            'C': self.cDoubleSpinBox.value(),
            'epsilon': self.epsilonDoubleSpinBox.value(),
            'kernel': kernel,
            'degree': self.degreeSpinBox.value(),
            'gamma': self.gammaComboBox.currentText(),
            'coef0': self.coeff0DoubleSpinBox.value(),
            'shrinking': self.shrinkingCheckBox.isChecked(),
            'tol': self.toleranceDoubleSpinBox.value(),
            'cache_size': self.cacheSizeSpinBox.value(),
            'verbose': self.verboseCheckBox.isChecked(),
            'max_iter': int(self.maxIterationsSpinBox.value())
        }

        return params, self.getChangedValues(params, SVR())
Ejemplo n.º 2
0
 def init_model(self):
     return SVR(kernel="rbf",
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                gamma=self.gamma)
Ejemplo n.º 3
0
def SVR_ST(trainFileName, testFileName):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFileName)

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:]))

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.4f' % max(pred_y[i], 0),
                '%.4f' % test_X[i, -4],
                '%.4f' % (float(test_X[i, -5]) * 2)
            ])
    return res
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cLineEdit.setText(str(svr.C))
        self.epsilonLineEdit.setText(str(svr.epsilon))
        self.kernel_list.setCurrentItem(
            self.kernel_list.findItems('Radial Basis Function',
                                       QtCore.Qt.MatchExactly)[0])
        self.degreeLineEdit.setText(str(svr.degree))
        self.coeff0LineEdit.setText(str(svr.coef0))
        self.shrinking_list.setCurrentItem(
            self.shrinking_list.findItems(str(svr.shrinking),
                                          QtCore.Qt.MatchExactly)[0])
        self.toleranceLineEdit.setText(str(svr.tol))
        self.maxIterationsLineEdit.setText(str(svr.max_iter))
Ejemplo n.º 5
0
    def connectWidgets(self):
        svr = SVR()
        svr.kernel = 'rbf'
        svr.degree = 3
        svr.gamma = 'auto'
        svr.coef0 = 0.0
        svr.tol = 1e-3
        svr.C = 1.0
        svr.epsilon = 0.1
        svr.shrinking = True
        svr.cache_size = 200
        svr.verbose = False
        svr.max_iter = -1

        self.cDoubleSpinBox.setValue(svr.C)
        self.epsilonDoubleSpinBox.setValue(svr.epsilon)
        self.defaultComboItem(self.kernelComboBox, svr.kernel)
        self.degreeSpinBox.setValue(svr.degree)
        self.defaultComboItem(self.gammaComboBox, svr.gamma)
        self.coeff0DoubleSpinBox.setValue(svr.coef0)
        self.shrinkingCheckBox.setChecked(svr.shrinking)
        self.toleranceDoubleSpinBox.setValue(svr.tol)
        self.cacheSizeSpinBox.setValue(svr.cache_size)
        self.verboseCheckBox.setChecked(svr.verbose)
        self.maxIterationsSpinBox.setValue(svr.max_iter)
Ejemplo n.º 6
0
 def init_model(self):
     return SVR(kernel="sigmoid",
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                coef0=self.coef0,
                gamma=self.gamma)
def test_ml_pipeline():
    'load a test data set, run SVM on it, and plot the predictions vs the actual values'
    data, targets = ReactivityDataLoader().load_mopac_learning()
    regressor = SVR(C=1000)
    trainData, testData, trainTargets, testTargets = train_test_split(data, targets)
    regressor.fit(trainData, trainTargets)
    os.chdir(str(Path.home() / 'Desktop'))
    main.plotScatterPlot(testTargets, regressor.predict(testData), 'predictedVsActual')
Ejemplo n.º 8
0
 def init_model(self):
     return SVR(kernel="poly",
                degree=self.degree,
                C=self.c,
                epsilon=self.eps,
                tol=self.tol,
                max_iter=self.max_iter,
                coef0=self.coef0,
                gamma=self.gamma)
Ejemplo n.º 9
0
    def run(self):
        params = {
            'C': self.cDoubleSpinBox.value(),
            'epsilon': self.epsilonDoubleSpinBox.value(),
            'kernel': self.kernelComboBox.currentText(),
            'degree': self.degreeSpinBox.value(),
            'gamma': self.gammaComboBox.currentText(),
            'coef0': self.coeff0DoubleSpinBox.value(),
            'shrinking': self.shrinkingCheckBox.isChecked(),
            'tol': self.toleranceDoubleSpinBox.value(),
            'cache_size': self.cacheSizeSpinBox.value(),
            'verbose': self.verboseCheckBox.isChecked(),
            'max_iter': int(self.maxIterationsSpinBox.value())
        }

        return params, self.getChangedValues(params, SVR())
Ejemplo n.º 10
0
def SVR_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    test_X, items = ld.LoadData_DATA_ITEM(testFileName)
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-3], train_y).predict(test_X[:, -7:-2])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.4f' % max(pred_y[i], 0),
            '%.4f' % test_X[i, -4],
            '%.4f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Ejemplo n.º 11
0
def SVR_ALL_train():
    train_X, train_y, _ = ld.loadData_all('./data/EVAL_DataSet1.csv')
    test_X, test_y, items = ld.loadData_all('./data/VALIDATION_DataSet1.csv')
    train_X = np.matrix(train_X)
    test_X = np.matrix(test_X)
    svr = SVR(kernel='linear', epsilon=0.5, C=1)
    pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.2f' % max(pred_y[i], 0),
            '%.2f' % test_X[i, -4],
            '%.2f' % (float(test_X[i, -5]) * 2)
        ])
    return res
Ejemplo n.º 12
0
def evalOne(parameters):
    all_obs = []
    all_pred = []
    for location in locations:
        trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, all_features, "target")
        normalizer_X = StandardScaler()
        trainX = normalizer_X.fit_transform(trainX)
        testX = normalizer_X.transform(testX)
        normalizer_Y = StandardScaler()
        trainY = normalizer_Y.fit_transform(trainY)
        testY = normalizer_Y.transform(testY)
        model = BaggingRegressor(base_estimator=SVR(kernel='rbf', C=parameters["C"], cache_size=5000), max_samples=parameters["max_samples"],n_estimators=parameters["n_estimators"], verbose=0, n_jobs=-1)
        model.fit(trainX, trainY)
        prediction = model.predict(testX)
        prediction = normalizer_Y.inverse_transform(prediction)
        testY = normalizer_Y.inverse_transform(testY)
        all_obs.extend(testY)
        all_pred.extend(prediction)
        
    return rmseEval(all_obs, all_pred)[1]
Ejemplo n.º 13
0
 def individual_training_executor(self, dim):
     # make a pipeline with preprocessing, autoencoder, regression
     scaler = MinMaxScaler(feature_range=(-0.5,0.5))
     autoencoder = Autoencoder(logPath=self.get_path(dim), hiddenDims=[50,dim],beta=0.1)
     mlPipeline = make_pipeline(scaler, autoencoder)
     
     # read in the data and train the autoencoder
     data, targets = self.read_mopac_reactivity_data()
     mlPipeline.fit(data, targets)
     
     # test the accuracy of an SVM on the transformed data using cross validation
     latent = mlPipeline.transform(data)
     regressor = SVR(C=10000)
     cross_validator = KFold(n_splits=5, shuffle=True, random_state=40)
     predictions = cross_val_predict(regressor, latent, targets, cv=cross_validator)
     
     # make a cross_val_predict-ed vs actual graph
     main.plotScatterPlot(targets, predictions, 'predictedVsActual')
     
     # print the cross validation actual and predicted targets to file
     actualThenPredicted = np.array([targets, predictions])
     np.savetxt('actualThenPredicted.txt', actualThenPredicted)
Ejemplo n.º 14
0
def SVR_ST_train():
    trainData = ld.loadData_ST('./data/EVAL_DataSetST1.csv')
    testData = ld.loadData_ST('./data/VALIDATION_DataSetST1.csv')

    store = ['1', '2', '3', '4', '5']
    res = []
    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        test_y = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:-1]))
            test_y.append(array[-1])

        train_X = np.matrix(train_X)
        test_X = np.matrix(test_X)
        svr = SVR(kernel='linear', epsilon=0.5, C=1)
        pred_y = svr.fit(train_X[:, -8:-1], train_y).predict(test_X[:, -8:-1])
        for i in range(len(test_X)):
            res.append([
                items[i][0], items[i][1],
                '%.2f' % max(pred_y[i], 0),
                '%.2f' % max(test_X[i, -4], 0),
                '%.2f' % max(2 * test_X[i, -5], 0)
            ])
    return res
Ejemplo n.º 15
0
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    ExtraTreeRegressor(random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    SVR()  # C: 0.25, 0.5, 1, 5, 10
]

selectors = [
    reliefF.reliefF,
    fisher_score.fisher_score,
    # chi_square.chi_square,
    JMI.jmi,
    CIFE.cife,
    DISR.disr,
    MIM.mim,
    CMIM.cmim,
    ICAP.icap,
    MRMR.mrmr,
    MIFS.mifs
]
                         l1_ratio=0.25,
                         fit_intercept=True),
            'complexity_label':
            'non-zero coefficients',
            'complexity_computer':
            lambda clf: np.count_nonzero(clf.coef_)
        },
        {
            'name': 'RandomForest',
            'instance': RandomForestRegressor(),
            'complexity_label': 'estimators',
            'complexity_computer': lambda clf: clf.n_estimators
        },
        {
            'name': 'SVR',
            'instance': SVR(kernel='rbf'),
            'complexity_label': 'support vectors',
            'complexity_computer': lambda clf: len(clf.support_vectors_)
        },
    ]
}
benchmark(configuration)

# benchmark n_features influence on prediction speed
percentile = 90
percentiles = n_feature_influence({'ridge': Ridge()}, configuration['n_train'],
                                  configuration['n_test'], [100, 250, 500],
                                  percentile)
plot_n_features_influence(percentiles, percentile)

# benchmark throughput
 def __sv_regressor__(self, data, target):
     from sklearn.svm.classes import SVR
     svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
     svr_rbf.fit(data, target)
     self.ensemble = svr_rbf
Ejemplo n.º 18
0
        classifier = DecisionTreeClassifier(max_depth=tree_depth)
    if alg == 1:
        classifier = RandomForestClassifier(n_estimators=random_forest_size,
                                            random_state=seed,
                                            n_jobs=10)
    if alg == 2:
        classifier = create_ensemble(seed)
    if alg == 3:
        classifier = AdaBoostClassifier(DecisionTreeClassifier(),
                                        n_estimators=boosting_size,
                                        random_state=seed)
    if alg == 4:
        scaler = StandardScaler()
        svr = SVR(kernel='rbf',
                  cache_size=4000,
                  C=1e3,
                  gamma=0.0001,
                  max_iter=200000,
                  epsilon=0.0001)
        classifier = Pipeline([('standardize', scaler), ('svr', svr)])
    if alg == 5:
        classifier = GaussianNB()

    if classifier == "not_init":
        print("Classifier not init, exit")
        exit(-1)

    if debug:
        print("TRAINING MODEL...")

    classifier.fit(training_x_no_missing, training_y)
                       'sample_slicer__band': [[c] for c in np.unique(ds.sa.band)],
                       'target_trans__target':["age"],
                       'estimator__clf__C': [1],                          
                       'cv__n_splits': [50],
                       'analysis__radius':[9.],
                        }


_default_config = {
               
                        'prepro':['sample_slicer', 'feature_norm', 'target_trans'],
                        'sample_slicer__band': ['alpha'], 
                        'sample_slicer__condition' : ['vipassana'],
                        'target_trans__target':"expertise_hours",
                        
                        'estimator': [('clf', SVR(C=1, kernel='linear'))],
                        'estimator__clf__C':1,
                        'estimator__clf__kernel':'linear',
                        
                        'cv': ShuffleSplit,
                        'cv__n_splits': 50,
                        'cv__test_size': 0.25,
                        
                        'scores' : ['neg_mean_squared_error','r2'],
                        
                        'analysis': SearchLight,
                        'analysis__n_jobs': 15,
                        'analysis__permutation':100,
                        'kwargs__cv_attr': 'subject',
                        'analysis__verbose':0,
Ejemplo n.º 20
0
def set_learning_method(config, X_train, y_train):
    """
    Instantiates the sklearn's class corresponding to the value set in the 
    configuration file for running the learning method.
    
    TODO: use reflection to instantiate the classes
    
    @param config: configuration object
    @return: an estimator with fit() and predict() methods
    """
    estimator = None

    learning_cfg = config.get("learning", None)
    if learning_cfg:
        p = learning_cfg.get("parameters", None)
        o = learning_cfg.get("optimize", None)
        scorers = \
        set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse']))

        method_name = learning_cfg.get("method", None)
        if method_name == "SVR":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVR(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = SVR(C=p.get("C", 10),
                                epsilon=p.get('epsilon', 0.01),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0034),
                                tol=p.get('tol', 1e-3),
                                verbose=False)
            else:
                estimator = SVR()

        elif method_name == "SVC":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVC(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get('cv', 5),
                                           o.get('verbose', True),
                                           o.get('n_jobs', 1))

            elif p:
                estimator = SVC(C=p.get('C', 1.0),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0),
                                coef0=p.get('coef0', 0.0),
                                tol=p.get('tol', 1e-3),
                                verbose=p.get('verbose', False))
            else:
                estimator = SVC()

        elif method_name == "LassoCV":
            if p:
                estimator = LassoCV(eps=p.get('eps', 1e-3),
                                    n_alphas=p.get('n_alphas', 100),
                                    normalize=p.get('normalize', False),
                                    precompute=p.get('precompute', 'auto'),
                                    max_iter=p.get('max_iter', 1000),
                                    tol=p.get('tol', 1e-4),
                                    cv=p.get('cv', 10),
                                    verbose=False)
            else:
                estimator = LassoCV()

        elif method_name == "LassoLars":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(LassoLars(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            if p:
                estimator = LassoLars(alpha=p.get('alpha', 1.0),
                                      fit_intercept=p.get(
                                          'fit_intercept', True),
                                      verbose=p.get('verbose', False),
                                      normalize=p.get('normalize', True),
                                      max_iter=p.get('max_iter', 500),
                                      fit_path=p.get('fit_path', True))
            else:
                estimator = LassoLars()

        elif method_name == "LassoLarsCV":
            if p:
                estimator = LassoLarsCV(max_iter=p.get('max_iter', 500),
                                        normalize=p.get('normalize', True),
                                        max_n_alphas=p.get(
                                            'max_n_alphas', 1000),
                                        n_jobs=p.get('n_jobs', 1),
                                        cv=p.get('cv', 10),
                                        verbose=False)
            else:
                estimator = LassoLarsCV()

    return estimator, scorers
Ejemplo n.º 21
0
from ex30.ex30_lib_graph import plot2
from sklearn.svm.classes import SVR

OUTPUT_PNG_FILE = '/experiments/ex30/ex30_svr.png'

X = [[float(x)] for x in range(0, 24)]
Y = [
    12.0, 13.0, 13.0, 13.0, 28.0, 31.0, 38.0, 60.0, 85.0, 80.0, 64.0, 60.0,
    59.0, 58.0, 65.0, 70.0, 80.0, 90.0, 110.0, 100.0, 85.0, 65.0, 45.0, 20.0
]

X2 = [[float(x) / 10.0] for x in range(0, 231)]

model = SVR(kernel='rbf', C=10)
model.fit(X, Y)
Y_pred = model.predict(X2)

print(str(Y_pred))

plot2(Y, Y_pred, OUTPUT_PNG_FILE, "Observed pollution concentration levels",
      "Predicted pollution concentration levels by SVR")
x_train = x_train.drop(['segment_id'], axis=1)

y_test = test_set['time_to_failure']
x_test_seg = test_set['segment_id']
x_test = test_set.drop(['time_to_failure'], axis=1)
x_test = x_test.drop(['segment_id'], axis=1)

# prepare models
models = []
# models.append(('LR', LogisticRegression()))
# models.append(('LDA', LinearDiscriminantAnalysis()))
# models.append(('KNN', KNeighborsClassifier()))
# models.append(('CART', DecisionTreeClassifier()))
# models.append(('NB', GaussianNB()))
svReg = SVR(C=20.299419990722537, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.06841395086207253, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=True);

randForReg = RandomForestRegressor(bootstrap=False, criterion='mse', max_depth=100,
           max_features='sqrt', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=5,
           min_weight_fraction_leaf=0.0, n_estimators=800, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

models.append(('LassoReg', Lasso(alpha=0.1)))
models.append(('SVM', svReg))
models.append(('LinearReg', LinearRegression()))
models.append(('randForest', randForReg))

mas = make_scorer(mean_absolute_error, greater_is_better=False);
Ejemplo n.º 23
0
    task='meg')
ds = loader.fetch()

# Preprocessing
pipeline = PreprocessingPipeline(nodes=[
    SampleSlicer({
        'band': ['alpha'],
        'condition': ['vipassana']
    }),
    FeatureWiseNormalizer(),
    TargetTransformer("expertise_hours")
])
ds_ = pipeline.transform(ds)

# Estimator
estimator_pp = Pipeline(steps=[('svr', SVR(C=1, kernel='linear'))])

cross_validation = GroupShuffleSplit(n_splits=10, test_size=0.25)
scores = ['r2', 'explained_variance']
cv_attr = 'subject'

sl = SearchLight(estimator=estimator_pp, scoring=scores, cv=cross_validation)
sl.fit(ds_, cv_attr=cv_attr)

#### Cross Validation ###
cross_validation = GroupShuffleSplit(n_splits=150, test_size=0.25)
groups = LabelEncoder().fit_transform(ds_.sa.subject)
X = ds_.samples
y = LabelEncoder().fit_transform(ds_.targets)
train_list = []
for train, test in cross_validation.split(X, y, groups=groups):
Ejemplo n.º 24
0
K_N_N = KNeighborsClassifier()
SUPPORT_VECTOR = svm.SVC(kernel="linear")

# Ensemble classifiers
RANDOM_FOREST = RandomForestClassifier(n_estimators=100)
GRADIENT_BOOST_CL = GradientBoostingClassifier(n_estimators=100)
ADA_BOOST = AdaBoostClassifier(n_estimators=100)
EXTRA_TREE = ExtraTreesClassifier(n_estimators=100)


# Regressors
GRADIENT_BOOST_RG = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
LINEAR_RG = LinearRegression()
RIDGE_RG = Ridge()
LASSO_RG = Lasso()
SVR_RG = SVR()

def getClassifierMap():
    CLASSIFIER_MAP = {
    "DECISION_TREE": DECISION_TREE,
    "LOGISTIC_REGRESSION": LOGISTIC_REGRESSION,
    "NAIVE_BAYS": NAIVE_BAYS,
    "K_N_N": K_N_N,
    "SUPPORT_VECTOR": SUPPORT_VECTOR,
    "RANDOM_FOREST": RANDOM_FOREST,
    "GRADIENT_BOOST": GRADIENT_BOOST_CL,
    "ADA_BOOST": GRADIENT_BOOST_CL,
    "EXTRA_TREE": EXTRA_TREE
    }
    return CLASSIFIER_MAP
Ejemplo n.º 25
0
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape

print X_train[123, :]
'''
norm1 =  np.linalg.norm(y_train)    
if norm1 != 0:   
    y_train, y_test =  y_train/norm1, y_test/norm1
print norm1
'''

print y_train.shape

model = SVR(C=1.0, gamma=1.0)
model = LinearRegression()

lasso = Lasso(alpha=0.1).fit(X_train, y_train)
enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train)

y_pred = lasso.predict(X_test)

print "MSE", mean_squared_error(y_test, y_pred)
m = np.mean(y_test)
print "MSE (Mean)", mean_squared_error(y_test, m * np.ones(len(y_test)))

print "r^2 on test data", r2_score(y_test, y_pred)

plt.plot(enet.coef_, label='Elastic net coefficients')
plt.plot(lasso.coef_, label='Lasso coefficients')
Ejemplo n.º 26
0
output = open(OUTPUT_DATA_FILE, 'w')
output.write("location,observation,prediction\n")

for location in locations:
    print(str(location))
    trainX, testX, trainY, testY = splitDataForXValidation(
        location, "location", data, all_features, "target")
    normalizer_X = StandardScaler()
    trainX = normalizer_X.fit_transform(trainX)
    testX = normalizer_X.transform(testX)
    normalizer_Y = StandardScaler()
    trainY = normalizer_Y.fit_transform(trainY)
    testY = normalizer_Y.transform(testY)
    model = BaggingRegressor(base_estimator=SVR(kernel='rbf',
                                                C=40,
                                                cache_size=5000),
                             max_samples=4200,
                             n_estimators=10,
                             verbose=0,
                             n_jobs=-1)
    model.fit(trainX, trainY)
    prediction = model.predict(testX)
    prediction = normalizer_Y.inverse_transform(prediction)
    testY = normalizer_Y.inverse_transform(testY)

    for i in range(0, len(testY)):
        output.write(str(location))
        output.write(",")
        output.write(str(testY[i]))
        output.write(",")
Ejemplo n.º 27
0
			'RadiusNeighborsClassifier':RadiusNeighborsClassifier(),
			'RadiusNeighborsRegressor':RadiusNeighborsRegressor(),
			'RandomForestClassifier':RandomForestClassifier(),
			'RandomForestRegressor':RandomForestRegressor(),
			'RandomizedLasso':RandomizedLasso(),
			'RandomizedLogisticRegression':RandomizedLogisticRegression(),
			'RandomizedPCA':RandomizedPCA(),
			'Ridge':Ridge(),
			'RidgeCV':RidgeCV(),
			'RidgeClassifier':RidgeClassifier(),
			'RidgeClassifierCV':RidgeClassifierCV(),
			'RobustScaler':RobustScaler(),
			'SGDClassifier':SGDClassifier(),
			'SGDRegressor':SGDRegressor(),
			'SVC':SVC(),
			'SVR':SVR(),
			'SelectFdr':SelectFdr(),
			'SelectFpr':SelectFpr(),
			'SelectFwe':SelectFwe(),
			'SelectKBest':SelectKBest(),
			'SelectPercentile':SelectPercentile(),
			'ShrunkCovariance':ShrunkCovariance(),
			'SkewedChi2Sampler':SkewedChi2Sampler(),
			'SparsePCA':SparsePCA(),
			'SparseRandomProjection':SparseRandomProjection(),
			'SpectralBiclustering':SpectralBiclustering(),
			'SpectralClustering':SpectralClustering(),
			'SpectralCoclustering':SpectralCoclustering(),
			'SpectralEmbedding':SpectralEmbedding(),
			'StandardScaler':StandardScaler(),
			'TSNE':TSNE(),
Ejemplo n.º 28
0
def train(driverSpeed, sectionSpeed, newData, firstTime, n, minLon, lonLen,
          minLat, latLen, defaultVel):
    '''返回SVR,由[路段平均速度,个人平均速度,载客信息]->瞬时速度训练得到'''
    X = []
    Y = []
    for file in newData:
        df = pandas.read_csv(
            file,
            header=None,
            names=["taxiId", "lat", "lon", "busy", "time", "vel", "sec"],
            dtype={
                "taxiId": numpy.int16,
                "lat": numpy.float32,
                "lon": numpy.float32,
                "busy": numpy.int8,
                "time": numpy.str,
                "vel": numpy.float32,
                "sec": numpy.int16
            })

        taxiId1 = -1
        sectionId1 = 0
        busy1 = 0
        time1 = firstTime
        for row in df.itertuples(index=False):
            taxiId2 = row[0]
            busy2 = row[3]
            time2 = datetime.datetime.strptime(row[4], "%Y/%m/%d %H:%M:%S")
            v = row[5]
            sectionId2 = row[6]
            if taxiId1 == taxiId2 and time1.hour == time2.hour and not numpy.isnan(
                    v):
                #前一个点额瞬时速度
                Y.append(v)
                x = []
                #路段平均速度
                v = sectionSpeed[sectionId1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #个人平均速度
                v = driverSpeed[taxiId1 - 1][time1.hour - firstTime.hour]
                if numpy.isnan(v):
                    x.append(defaultVel)
                else:
                    x.append(v)
                #是否载客
                x.append(busy1)
                X.append(x)
            taxiId1 = taxiId2
            busy1 = busy2
            time1 = time2
            sectionId1 = sectionId2

    clf = SVR(C=1.0,
              cache_size=200,
              coef0=0.0,
              degree=3,
              epsilon=0.2,
              gamma='auto',
              kernel='rbf',
              max_iter=-1,
              shrinking=True,
              tol=0.001,
              verbose=False)
    clf.fit(X, Y)

    return clf
def moudle_select(X, test_A, y, moudelselect, threshold=False, Rate=False):
    '''
    Function :model
    X : train data 
    test_A : predict data
    y : result label
    predict_A : predict data
    moudelselect : waht' model do you select?
    threshold:False
    Rate:False
    
    
    modelselect :
    1,XGBRegressor
    2,ensemble.RandomForestRegressor
    3,linear_model.Lasso
    4,LinearRegression
    5,linear_model.BayesianRidge
    6,DecisionTreeRegressor
    7,ensemble.RandomForestRegressor
    8,ensemble.GradientBoostingRegressor
    9,ensemble.AdaBoostRegressor
    10,BaggingRegressor
    11,ExtraTreeRegressor
    12,SVR
    13,MLPRegressor
    other:MLPRegressor
    '''

    mse = []
    sum_mse = 0.0
    predict_A = pd.DataFrame(np.zeros((100, 10)))

    for index in range(5):
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        if (moudelselect == 1):
            model = xgb.XGBRegressor(
                model=xgb.XGBRegressor(max_depth=17,
                                       min_child_weigh=5,
                                       eta=0.025,
                                       gamma=0.06,
                                       subsample=1,
                                       learning_rate=0.1,
                                       n_estimators=100,
                                       silent=0,
                                       n_jobs=-1,
                                       objective='reg:linear'))

        elif (moudelselect == 2):
            model = ensemble.RandomForestRegressor(
                n_estimators=25,
                criterion='mse',
                max_depth=14,
                min_samples_split=0.1,
                min_samples_leaf=2,
                min_weight_fraction_leaf=0.0,
                max_features=0.95,
                max_leaf_nodes=None,
                min_impurity_split=1e-07,
                bootstrap=True,
                oob_score=False,
                n_jobs=-1,
                random_state=None,
                verbose=0,
                warm_start=False)
        elif (moudelselect == 3):
            model = linear_model.Lasso(alpha=0.1,
                                       max_iter=1000,
                                       normalize=False)

        elif (moudelselect == 4):
            model = LinearRegression(fit_intercept=False,
                                     n_jobs=1,
                                     normalize=False)

        elif (moudelselect == 5):
            model = linear_model.BayesianRidge(alpha_1=1e-06,
                                               alpha_2=1e-06,
                                               compute_score=False,
                                               copy_X=True,
                                               fit_intercept=True,
                                               lambda_1=1e-06,
                                               lambda_2=1e-06,
                                               n_iter=500,
                                               normalize=False,
                                               tol=10,
                                               verbose=False)

        elif (moudelselect == 6):
            model = DecisionTreeRegressor(criterion='mse',
                                          splitter='best',
                                          max_depth=3,
                                          min_samples_split=0.1,
                                          min_samples_leaf=0.1,
                                          min_weight_fraction_leaf=0.1,
                                          max_features=None,
                                          random_state=None,
                                          max_leaf_nodes=None,
                                          presort=False)

        elif (moudelselect == 7):
            model = ensemble.RandomForestRegressor(
                n_estimators=1000,
                criterion='mse',
                max_depth=14,
                min_samples_split=0.1,
                min_samples_leaf=2,
                min_weight_fraction_leaf=0.0,
                max_features='auto',
                max_leaf_nodes=None,
                min_impurity_split=1e-07,
                bootstrap=True,
                oob_score=False,
                n_jobs=-1,
                random_state=None,
                verbose=0,
                warm_start=False)
        elif (moudelselect == 8):
            model = ensemble.GradientBoostingRegressor(n_estimators=800,
                                                       learning_rate=0.1,
                                                       max_depth=4,
                                                       random_state=0,
                                                       loss='ls')

        elif (moudelselect == 9):
            model = ensemble.AdaBoostRegressor(base_estimator=None,
                                               n_estimators=120,
                                               learning_rate=1,
                                               loss='linear',
                                               random_state=None)

        elif (moudelselect == 10):
            model = BaggingRegressor(base_estimator=None,
                                     n_estimators=500,
                                     max_samples=1.0,
                                     max_features=1.0,
                                     bootstrap=True)
        elif (moudelselect == 11):
            model = ExtraTreeRegressor(criterion='mse',
                                       splitter='random',
                                       max_depth=3,
                                       min_samples_split=0.1,
                                       min_samples_leaf=1,
                                       min_weight_fraction_leaf=0.01,
                                       max_features='auto',
                                       random_state=None,
                                       max_leaf_nodes=None,
                                       min_impurity_split=1e-07)

        elif (moudelselect == 12):
            model = SVR(kernel='rbf',
                        degree=3,
                        gamma='auto',
                        coef0=0.1,
                        tol=0.001,
                        C=1,
                        epsilon=0.1,
                        shrinking=True,
                        cache_size=200,
                        verbose=False,
                        max_iter=-1)

        elif (moudelselect == 13):
            model = MLPRegressor(hidden_layer_sizes=(100, ),
                                 activation='relu',
                                 solver='adam',
                                 alpha=0.0001,
                                 batch_size='auto',
                                 learning_rate='constant',
                                 learning_rate_init=0.001,
                                 power_t=0.5,
                                 max_iter=200,
                                 shuffle=True,
                                 random_state=None,
                                 tol=0.0001,
                                 verbose=False,
                                 warm_start=False,
                                 momentum=0.9,
                                 nesterovs_momentum=True,
                                 early_stopping=False,
                                 validation_fraction=0.1,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08)
        else:
            model = MLPRegressor(activation='relu',
                                 alpha=0.001,
                                 solver='lbfgs',
                                 max_iter=90,
                                 hidden_layer_sizes=(11, 11, 11),
                                 random_state=1)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print("index: ", index, mean_squared_error(y_test, y_pred))
        sum_mse += mean_squared_error(y_test, y_pred)
        #
        #
        if (threshold == False):
            y_predict = model.predict(test_A)
            predict_A.ix[:, index] = y_predict
            mse.append(mean_squared_error(y_test, y_pred))
        else:
            if (mean_squared_error(y_test, y_pred) <= 0.03000):
                y_predict = model.predict(test_A)
                predict_A.ix[:, index] = y_predict
                mse.append(mean_squared_error(y_test, y_pred))


#        if(Rate==False):
#            mse_rate = mse / np.sum(mse)
#            #predict_A = predict_A.ix[:,~(data==0).all()]
#            for index in range(len(mse_rate)):
#                y+=predict_A.ix[:,index]*mse_rate[index]
#
    y = 0.0
    mse = mse / np.sum(mse)
    mse = pd.Series(mse)
    mse_rate_asc = mse.sort_values(ascending=False)
    mse_rate_asc = mse_rate_asc.reset_index(drop=True)
    mse_rate_desc = mse.sort_values(ascending=True)
    indexs = list(mse_rate_desc.index)
    for index in range(len(mse)):
        y += mse_rate_asc.ix[index] * predict_A.ix[:, indexs[index]]

    print("y_predict_mean: ", y.mean())
    print("y_predict_var: ", y.var())
    y = pd.DataFrame(y)
    y.to_excel("H:/java/python/src/machinelearning/test/predict.xlsx",
               index=False)
    predict_A.to_excel(
        "H:/java/python/src/machinelearning/test/predict_testA.xlsx",
        index=False)
    print("Averge mse:", sum_mse / len(mse))