Ejemplo n.º 1
0
    def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest']):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.

        """
        for machine in machine_list:
            if machine == 'lasso':
                self.machines_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
            if machine == 'tree':
                self.machines_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
            if machine == 'ridge':
                self.machines_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
            if machine == 'random_forest':
                self.machines_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
Ejemplo n.º 2
0
def main():
    train_x, train_y, test_x, test_y = (sio.loadmat(TRAIN_DIR)['trainx'],
                                        sio.loadmat(TRAIN_DIR)['trainy'],
                                        sio.loadmat(TEST_DIR)['testx'],
                                        sio.loadmat(TEST_DIR)['testy'])
    clf = linear_model.RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
                               normalize=True,
                               cv=10,
                               store_cv_values=False)
    print("Training......")
    clf.fit(train_x, train_y)
    print("Predicting.......")
    years = clf.predict(test_x)

    diff = 0.0
    for i, j in zip(test_y, years):
        diff += abs(i - j)
    diff /= TEST_SIZE
    print("MSE is: " + str(diff))
 def fit(self, train, y):
     internal_model=linear_model.RidgeCV(alphas=(0.1, 0.5, 1.0, 5.0, 10.0), fit_intercept=False)
     bestscore=1e15
     better=True
     indextrain=train.dropna().index
     limitlen=len(train)*self.limit_size_train
     while better:
         internal_model.fit(train.ix[indextrain], y.ix[indextrain])
         score=metrics.mean_squared_error(internal_model.predict(train.ix[indextrain]), y.ix[indextrain])
         if score < bestscore:
             bestscore=score
             self.bestmodel=internal_model
             residual=y.ix[indextrain]-internal_model.predict(train.ix[indextrain])
             indextrain=residual[abs(residual)<=abs(residual).quantile(self.quant)].index
             if len(indextrain)<limitlen:
                 better=False
         else:
             better=False
             self.bestmodel=internal_model
Ejemplo n.º 4
0
    def Models(self, model_names):
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
            self.poly_features, self.y, test_size=0.3)

        #model_lasso = linear_model.LassoCV(alphas=[0.0001,0.001,0.01,0.05,0.1,0.2,0.5,1,10],cv=10)
        model_linear = linear_model.LinearRegression()
        model_ridge = linear_model.RidgeCV(
            alphas=[0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5, 1, 10], cv=10)
        model_en = linear_model.ElasticNetCV(
            alphas=[0.0001, 0.0005, 0.001, 0.01, 0.1, 1, 10],
            l1_ratio=[.01, .1, .5, .9, .99],
            max_iter=5000)
        model_br = linear_model.BayesianRidge(alpha_1=1e-06,
                                              alpha_2=1e-06,
                                              compute_score=False,
                                              copy_X=True,
                                              fit_intercept=True,
                                              lambda_1=1e-06,
                                              lambda_2=1e-06,
                                              n_iter=300,
                                              normalize=False,
                                              tol=0.001,
                                              verbose=False)
        #model_svr = model_selection.GridSearchCV(LinearSVR(random_state=0, tol=1e-5),
        #                                         param_grid={"epsilon":[0,0.2],"C": [0,1]},cv = 5)
        model_svr = make_pipeline(StandardScaler(),
                                  svm.LinearSVR(random_state=0, tol=1e-5))
        model_sgdr = make_pipeline(StandardScaler(),
                                   SGDRegressor(max_iter=1000, tol=1e-3))
        model_list = [
            model_linear, model_ridge, model_en, model_br, model_svr,
            model_sgdr
        ]
        #print('岭回归最优alpha值:', model_ridge.alpha_, '\n', '-'*50)

        for i in range(6):
            model_predict = model_list[i].fit(self.X_train, self.y_train)
            self.pre_y_list = model_predict.predict(self.X_val)
            joblib.dump(
                model_predict,
                f'C:\\Users\\Administrator\\Desktop\\7.21\\models-saved\\1期 device6 模型{model_names[i]}.pkl'
            )
def single_ridge_regression(X_in, y_in):
    '''
    Performs a single variable ridge regression given X feature and y outcome
    variable
    
    Input: Series of X feature and Series of y outcome variables
    
    Output: dataframe of beta coefficients, and the ridge regressor
    '''
    # Ridge regression using only cumulative number of COVID cases

    # standardizes X values for the ridge regression
    scaler = StandardScaler()
    X = X_in.values.reshape(-1, 1)
    y = y_in.values.reshape(-1, 1)
    X_std = scaler.fit_transform(X)

    # formulates the training and tests set for each variable
    X_train, X_test, y_train, y_test = train_test_split(X_std,
                                                        y,
                                                        test_size=.2,
                                                        random_state=0)

    # ridge regression algorithm
    regressor = linear_model.RidgeCV(alphas=[.1, 1, 10])
    regressor = regressor.fit(X_std, y)
    regressor.alpha_

    # coefficient dataframe and y prediction results of the ridge regression
    y_pred = regressor.predict(X_test)
    df = pd.DataFrame({
        "Actual": y_test.flatten(),
        "Predicted": y_pred.flatten()
    })
    # shows model performance
    print("Ridge regression score: " + str(regressor.score(X_std, y)))
    print("Ridge regression explained_variance: " +
          str(metrics.explained_variance_score(y_test, y_pred)))
    print("Ridge regression MSE: " +
          str(metrics.mean_squared_error(y_test, y_pred)))

    return regressor, df
def multi_ridge_regression(X_in, y_in):
    '''
    Performs a multivariate ridge regression given X features and y outcome
    variable 
    
    Input: dataframe of X features and Series of y outcome variables
    
    Output: dataframe of beta coefficients, and the ridge regressor
    '''
    # Ridge regression

    # standardizes X values for the ridge regression
    scaler = StandardScaler()
    X = X_in.values
    y = y_in.values
    X_std = scaler.fit_transform(X)

    # formulates the training and tests set for each variable
    X_train, X_test, y_train, y_test = train_test_split(X_std,
                                                        y,
                                                        test_size=.2,
                                                        random_state=0)

    # ridge regression algorithm
    regressor = linear_model.RidgeCV(alphas=[.1, 1, 10])
    regressor = regressor.fit(X_std, y)
    regressor.alpha_

    # coefficient dataframe and y prediction results of the ridge regression
    coeff_df = pd.DataFrame(regressor.coef_,
                            X_in.columns,
                            columns=["Coefficients"])
    y_pred = regressor.predict(X_test)
    df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

    print("Ridge regression score: " + str(regressor.score(X_std, y)))
    print("Ridge regression: " +
          str(metrics.explained_variance_score(y_test, y_pred)))
    print("Ridge regression: " +
          str(metrics.mean_squared_error(y_test, y_pred)))

    return coeff_df, df, regressor
def ridgeCV_model(X_train,X_valid,y_train,y_test,y_name, y_train_mean,y_train_std):
 
	print('head items to fit are: ', y_name)
		# In[ ]:
	for head_item in range(len(y_name)):

		y_train_item = y_train[:,head_item]
		y_train_item = np.reshape(y_train_item,[y_train.shape[0],1])

		y_test_item = y_test[:,head_item]
		y_test_item = np.reshape(y_test_item,[y_test_item.shape[0],1])
		print('********************************** Fitting RidgeCV on %s Data **********************************' % y_name[head_item])
		#Declare model
		model = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0],normalize=True,fit_intercept=True)

		

		#Fit model
		model.fit(X_train,y_train_item)

		#Get predictions
		y_valid_predicted=model.predict(X_valid)


		training_prediction=model.predict(X_train)

		R2s_training=get_R2(y_train_item,training_prediction)
		print('R2 on training set = ', R2s_training)

		#Get metric of fit
		R2s=get_R2(y_test_item,y_valid_predicted)
		print('R2s:', R2s)
		print('saving prediction ...')
		np.savez(y_name[head_item] + '_RidgeCV_ypredicted.npz',y_test=y_test_item,y_prediction=y_valid_predicted,
			y_train_=y_train_item,training_prediction=training_prediction,
			y_train_mean=y_train_mean[head_item],y_train_std=y_train_std[head_item])
		#print 'saving model ...'
		joblib.dump(model, y_name[head_item] + '_Ridge.pkl') 
		print('plotting results...')
		plot_results(y_test_item,y_valid_predicted,y_name[head_item],R2s,model_name='RidgeCV')

	return model
Ejemplo n.º 8
0
 def __init__(self, X, y, kind):
     self.X = X
     self.y = y
     kind = kind.upper()
     if kind == 'SVM':
         from sklearn import svm
         self.regressor = svm.SVR()
     elif kind == 'RIDGECV':
         from sklearn import linear_model
         self.regressor = linear_model.RidgeCV(
             alphas=[
                 x * y for x in [0.01, 0.1, 1, 10] for y in [1, 5]
                 ]
             )
     elif kind == 'SVM_GRID':
         from sklearn import svm
         from sklearn.model_selection import GridSearchCV
         self.regressor = GridSearchCV(
             svm.SVR(),
             {
                 'C': [1e0, 5e0, 1e1, 5e1, 1e2, 5e2, 1e3],
                 'epsilon': [1e-3, 1e-2],
                 'kernel': ['linear', 'rbf', 'poly'],
                 'degree': [2, 3, 4]
             },
             scoring='neg_mean_squared_log_error'
         )
     elif kind == 'SVM_GRID_SIMPLE':
         from sklearn import svm
         from sklearn.model_selection import GridSearchCV
         self.regressor = GridSearchCV(
             svm.SVR(),
             {
                 'C': [1e0, 5e0, 1e1, 5e1, 1e2],
                 'gamma': [1e-3, 1e-2, 1e-1],
                 'kernel': ['linear'],
                 'degree': [2]
             },
             scoring='neg_mean_squared_log_error',
             n_jobs=-1,
             cv=7
         )
Ejemplo n.º 9
0
def evaluate_lcc_model(en_train_matrix, en_test_matrix, fr_train_matrix,
                       fr_test_matrix, dimensions, evaluation_function):

    scores = []

    for dimension in tqdm(dimensions):
        en = en_train_matrix[:, :dimension] - np.mean(
            en_train_matrix[:, :dimension], axis=0)
        fr = fr_train_matrix[:, :dimension] - np.mean(
            fr_train_matrix[:, :dimension], axis=0)
        sample_size = en.shape[0]
        zero_matrix = np.zeros((sample_size, dimension))
        X1 = np.concatenate((en, zero_matrix), axis=1)
        X2 = np.concatenate((zero_matrix, fr), axis=1)
        X = np.concatenate((X1, X2), axis=0)
        Y1 = np.concatenate((en, fr), axis=1)
        Y2 = np.concatenate((en, fr), axis=1)
        Y = np.concatenate((Y1, Y2), axis=0)

        reg = linear_model.RidgeCV(alphas=[1e-10, 1e-3, 1e-2, 1e-1, 1, 10])
        reg.fit(X, Y)
        pca = PCA(n_components=int(dimension))
        pca.fit(reg.predict(X))
        rrr = lambda X: np.matmul(pca.transform(reg.predict(X)), pca.
                                  components_)

        #sample_size = len(en_docs_test)
        en = en_test_matrix[:, :dimension] - np.mean(
            en_train_matrix[:, :dimension], axis=0)
        fr = fr_test_matrix[:, :dimension] - np.mean(
            fr_train_matrix[:, :dimension], axis=0)
        zero_matrix = np.zeros((en_test_matrix.shape[0], dimension))
        X1 = np.concatenate((en, zero_matrix), axis=1)
        X2 = np.concatenate((zero_matrix, fr), axis=1)
        X = np.concatenate((X1, X2), axis=0)
        english_encodings_lcc = rrr(X1)
        french_encodings_lcc = rrr(X2)
        score = evaluation_function(english_encodings_lcc,
                                    french_encodings_lcc)
        scores.append(score)

    return scores
Ejemplo n.º 10
0
 def fit_and_predict(self, TEST_YEAR, regularization=True):
     X, Y, xTrain, yTrain, xTest, yTest, names = self.build_data_arrays(TEST_YEAR)
     if regularization:
         predictor = linear_model.RidgeCV(alphas=[0.1, 1.0, 10], fit_intercept=True, normalize=self.shouldNormalize)
     else:
         predictor = linear_model.LinearRegression(fit_intercept=True, normalize=self.shouldNormalize, copy_X=True, n_jobs=1)
     scores = {}
     relativeError = {}
     coefficients = {}
     output = {}
     for p in self.positions:
         if len(xTrain[p]) > 1 and len(xTest[p]) > 1:
             predictor.fit(np.array(xTrain[p]), np.array(yTrain[p]))
             coefficients[p] = pd.DataFrame(zip(self.features, predictor.coef_), columns = ['feature', 'coefficient']).sort_values(by=['coefficient'], ascending=False)
             prediction = predictor.predict(np.array(xTest[p]))
             output[p] = pd.DataFrame(zip(names[p], prediction), columns = ['name', 'value']).sort_values(by=['value'], ascending=False)
             scores[p] = (mean_squared_error(np.array(yTest[p]), np.array(prediction)),
                          r2_score(np.array(yTest[p]), np.array(prediction)))
             relativeError[p] = self.get_relative_error(output[p], TEST_YEAR)
     return output
Ejemplo n.º 11
0
	def ridge_regress(self, cv = 20, alphas = None ):
		"""perform k-folds cross-validated ridge regression on the design_matrix. To be used when the design matrix contains very collinear regressors. For cross-validation and ridge fitting, we use sklearn's RidgeCV functionality. Note: intercept is not fit, and data are not prenormalized. 

			:param cv: cross-validated folds, inherits RidgeCV cv argument's functionality.
			:type cv: int, standard = 20
			:param alphas: values of penalization parameter to be traversed by the procedure, inherits RidgeCV cv argument's functionality. Standard value, when parameter is None, is np.logspace(7, 0, 20)
			:type alphas: numpy array, from >0 to 1. 
			:returns: instance variables 'betas' (nr_betas x nr_signals) and 'residuals' (nr_signals x nr_samples) are created.
		"""
		if alphas == None:
			alphas = np.logspace(7, 0, 20)
		self.rcv = linear_model.RidgeCV(alphas=alphas, 
				fit_intercept=False, 
				cv=cv) 
		self.rcv.fit(self.design_matrix.T, self.resampled_signal.T)

		self.betas = self.rcv.coef_.T
		self.residuals = self.resampled_signal - self.rcv.predict(self.design_matrix.T)

		self.logger.debug('performed ridge regression on %s design_matrix and %s signal, resulting alpha value is %f' % (str(self.design_matrix.shape), str(self.resampled_signal.shape), self.rcv.alpha_))
def fit_ridge(oe,mw):  #### oe = x; mw = y variable. 
    # fit line:
    oe = np.array(oe)
    mw = np.array(mw)

    clf = linear_model.RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0])
    num_codes = min(len(oe),len(mw))
    print 'oe.shape = ', oe.shape
    oe = oe.reshape(-1, 1)
    print 'oe.shape = ', oe.shape
    mw = mw.reshape(-1, 1)
    clf.fit(oe[0:num_codes], mw[0:num_codes]) 
    print '****************************************************************************'
    print 'RIDGE REGRESSION SCORE = ', clf.score(oe[0:num_codes],mw[0:num_codes])   
    m = clf.coef_[0][0]
    c = clf.intercept_[0]

    print 'm,c = ', m,c

    return m,c
Ejemplo n.º 13
0
	def poly_ridge(self,deg=2):
		''' Polynomial Ridge Regression
		'''
		from sklearn import linear_model
		# Training
		t0 = time.time()
		phi = cm.naivePolyFeature(self.X,deg=deg,norm=True)
		lm = linear_model.RidgeCV(alphas=np.logspace(-10,-1,10))
		lm.fit(phi,self.y)
		print lm.alpha_
		t_tr = time.time() - t0

		# Predicting
		t0 = time.time()
		phi_pred = cm.naivePolyFeature(self.X_pred,deg=deg,norm=True)
		y_lr = lm.predict(phi_pred)
		t_pr = time.time() - t0

		eel = np.mean(np.maximum(self.Value0-self.c-np.sum(y_lr,axis=1),0))
		return (eel, t_tr, t_pr)
def RidgeRegression(x, y, z, degree=5, alpha=10**(-6), verbose=False):
    # Split into training and test
    x_train = np.random.rand(100, 1)
    y_train = np.random.rand(100, 1)
    z = FrankeFunction(x_train, y_train)

    # training and finding design matrix X_
    X = np.c_[x_train, y_train]
    poly = PolynomialFeatures(degree)
    X_ = poly.fit_transform(X)
    ridge = linear_model.RidgeCV(alphas=np.array([alpha]))
    ridge.fit(X_, z)
    beta = ridge.coef_
    #intercept = ridge.intercept_

    # predict data and prepare for plotting
    x_, y_ = np.meshgrid(x, y)
    x = x_.reshape(-1, 1)
    y = y_.reshape(-1, 1)
    M = np.c_[x, y]
    M_ = poly.fit_transform(M)
    predict = M_.dot(beta.T)

    if verbose:
        print("X_: ", np.shape(X_))
        print("M: ", np.shape(M))
        print("M_: ", np.shape(M_))
        print("predict: ", np.shape(predict))

    # show figure
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.plot_surface(x_,
                    y_,
                    predict.reshape(20, 20),
                    cmap=cm.coolwarm,
                    linewidth=0,
                    antialiased=False)
    plt.show()

    return beta
Ejemplo n.º 15
0
def __initialize_model(model_name, lamda=0, hyper_parameters={}):
    """
    initialize machine learning model.

    Args:
        model_name: learning algorithm name
        lamda: coefficient of standardization item
        hyper_parameter: other parameters for algorithms
               See parameters for RandomForest Regression in sci-kit-learn

    Returns:
        an initialized classifier
    """
    if model_name == constants.MODEL_NAME_LASSO:
        # note: alpha in scikit-learn reprsents lamda which is the constant that
        # multiplies the regularization term
        clf_lasso = linear_model.Lasso(alpha=lamda)
        return clf_lasso
    elif model_name == constants.MODEL_NAME_ELASTICNET:
        clf_elasticnet = ElasticNet(alpha=lamda)
        return clf_elasticnet
    elif model_name == constants.MODEL_NAME_RIDGE:
        clf_ridge = linear_model.Ridge(alpha=lamda)
        return clf_ridge
    elif model_name == constants.MODEL_NAME_RIDGECV:
        clf_ridgecv = linear_model.RidgeCV(alphas=constants.lamdaArray)
        return clf_ridgecv
    elif model_name == constants.MODEL_NAME_LARS:
        clf_lars = linear_model.Lars(n_nonzero_coefs=1)
        return clf_lars
    elif model_name == constants.MODEL_NAME_BAYESIAN:
        clf_bayesian = linear_model.BayesianRidge()
        return clf_bayesian
    elif model_name == constants.MODEL_NAME_SGD:
        clf_sgd = linear_model.SGDRegressor(alpha=lamda)
        return clf_sgd
    elif model_name == constants.MODEL_NAME_RANDOM_FOREST:
        clf_random_forest = RandomForestRegressor(**hyper_parameters,
                                                  random_state=0,
                                                  n_jobs=-1)
        return clf_random_forest
def cross_validate_model(X_train, Y_train):
    """
	Here we perform cross validation of models to choose the best one.
	"""
    # Divide the training and testing data
    train, test, y_actual, y_predict = train_test_split(X_train,
                                                        Y_train,
                                                        test_size=0.5,
                                                        random_state=42)

    # List the regression methods to use.
    clf_random_forest = ensemble.RandomForestRegressor(n_estimators=50)
    clf_adaboost_reg = ensemble.AdaBoostRegressor(n_estimators=50)
    clf_lasso_larscv = sklinear.LassoLarsCV(cv=9)
    clf_ridge = sklinear.RidgeCV()
    clf_elastic_net = sklinear.ElasticNet()
    clf_extra_tree = ensemble.ExtraTreesRegressor(n_estimators=50)
    clf_mlpr = neural_network.MLPRegressor(solver='adam')

    # Add the above methods in an array
    # More ameable for looping
    methods = [
        clf_random_forest, clf_adaboost_reg, clf_lasso_larscv, clf_elastic_net,
        clf_extra_tree, clf_mlpr
    ]
    methods_label = [
        'clf_random_forest', 'clf_adaboost_reg', 'clf_lasso_larscv',
        'clf_elastic_net', 'clf_extra_tree', 'clf_mlpr'
    ]

    method_mse = np.zeros((len(methods), 1))
    # Fit and predict for each method
    for i in range(len(methods)):
        methods[i].fit(train, y_actual)
        method_predict = methods[i].predict(test)
        method_mse[i] = metrics.mean_squared_error(y_predict, method_predict)
        print('MSE for %s while cross validation : %f' %
              (methods_label[i], method_mse[i]))

    # We return the method which has the minimum mse
    return np.argmin(method_mse)
Ejemplo n.º 17
0
    def __init__(self, train=True, train_set=[], clf='ridge'):
        self.identity = None
        self.nb_victories = 0
        self.nb_games = 0
        self.next_action = {}
        self.gamma = 1
        self.initial_alpha = 0.05
        self.min_alpha = 0.005
        self.alpha_update_rate = 1  # pow(self.initial_alpha/self.min_alpha, 1/500)
        self.alpha = self.initial_alpha
        self.epsilon = 0.1
        self.state_space_x = [
            'low_prestige', 'high_prestige', 'can_buy_prestige',
            'can_buy_card', 'can_reserve', 'can_take_2', 'can_take_3'
        ]
        self.training_phase = train
        self.is_trained = not train
        self.classifier = {
            'ridge': linear_model.RidgeCV(),
            'mlp': neural_network.MLPRegressor()
        }[clf]
        self.state_space_y = [0, 1]  # 0: few, 1: some, 2: a lot
        self.state_space, self.state_space_inverted_index = self.build_state_space(
        )
        #        self.state_space_inverted_index = self.build_inverted_index(self.state_space)

        self.action_space = [
            'buy_prestige', 'buy_card', 'reserve', 'take_3', 'take_2',
            'do_nothing'
        ]
        self.action_space_inverted_index = self.build_inverted_index(
            self.action_space)
        self.nb_actions = len(self.action_space)
        self.nb_states = len(self.state_space)

        self.state = {}
        self.actions = []

        self.init_features()
        if len(train_set) > 0:
            self.load_training(train_set)
Ejemplo n.º 18
0
def poly_regression(x_tr,
                    x_ts,
                    y_tr,
                    y_ts,
                    degree,
                    filename='Poly_regression'):
    print("Polynomial Regression")
    x_train = x_tr
    x_test = x_ts
    y_train = y_tr
    y_test = y_ts

    poly_features = PolynomialFeatures(degree=degree)
    x_train_poly = poly_features.fit_transform(x_train)
    poly_model = linear_model.RidgeCV(alphas=np.logspace(-9, 9, 19),
                                      normalize=True)
    #poly_model = linear_model.LinearRegression(normalize=True)

    print("Fitting...")
    poly_model.fit(x_train_poly, y_train)

    # Save The Model into File
    filename_model = f'Trained Models\{filename}.sav'
    pickle._dump(poly_model, open(filename_model, 'wb'))

    #print("Best Alpha : ", poly_model.alpha_)

    print("Predicting...")
    prediction = poly_model.predict(poly_features.fit_transform(x_test))

    print(
        "Poly Accuracy: ",
        round(
            poly_model.score(poly_features.fit_transform(x_test), y_test) *
            100), "%")
    print('Poly Mean Square Error',
          metrics.mean_squared_error(y_test, prediction), "\n\n")
    print("First Value of Test Samples' Actual Output: ",
          np.asarray(y_test)[0])
    print("First Value of Test Samples' Predicted Output: ", prediction[0])
    '''fig, ax = plt.subplots()
Ejemplo n.º 19
0
def run_ridge_cv(x_df, y_df, analyspar, alphas=None):
    """
    run_ridge_cv(x_df, y_df, analyspar)
    """

    if alphas is None:
        alphas = (0.1, 0.1, 2.0)

    steps = [("scaler", preprocessing.StandardScaler()),
             ("model",
              linear_model.RidgeCV(normalize=True,
                                   alphas=alphas,
                                   store_cv_values=True))]

    pipl = pipeline.Pipeline(steps)
    pipl.fit(x_df, y_df)

    log_sklearn_results(pipl,
                        analyspar,
                        name="ridge_cv",
                        var_names=x_df.columns)
Ejemplo n.º 20
0
def build_models(predictors, responses, modelNo):
    if(modelNo==0):
        # Linear Regression
        model = linear_model.LinearRegression();
        modelName = "Linear Regression";
    if(modelNo==1):
        # Ridge Regression
        model = linear_model.RidgeCV(alphas = (0.1,0.1,10));
        modelName = "Ridge Regression";
    if(modelNo==2):
        # lasso Regression
        model = linear_model.MultiTaskLassoCV(eps=0.001, n_alphas=100, alphas=(0.1,0.1,10));
        modelName = "Lasso Regression";
    model.fit(predictors, responses);
    predictions = model.predict(predictors);
    Result = {};
    Result['modelName'] = modelName;
    Result['predictions'] = predictions;
    Result['model'] = model;
    Result['Corr'] = pearsonr(predictions,responses)[0][0];
    return Result;
Ejemplo n.º 21
0
def _regression_DKL(n_, fo_, fx_, stims_all_LM_, stims_all_S_, stims_all_LUM_):
    """ regress all coordinates but only hue matters """
    reg_fo_LM = linear_model.RidgeCV()
    reg_fo_S = linear_model.RidgeCV()
    reg_fo_LUM = linear_model.RidgeCV()
    reg_fx_LM = linear_model.RidgeCV()
    reg_fx_S = linear_model.RidgeCV()
    reg_fx_LUM = linear_model.RidgeCV()
    reg_fo_LM.fit(fo_[:n_], stims_all_LM_[:n_])
    reg_fo_S.fit(fo_[:n_], stims_all_S_[:n_])
    reg_fo_LUM.fit(fo_[:n_], stims_all_LUM_[:n_])
    reg_fx_LM.fit(fx_[:n_], stims_all_LM_[:n_])
    reg_fx_S.fit(fx_[:n_], stims_all_S_[:n_])
    reg_fx_LUM.fit(fx_[:n_], stims_all_LUM_[:n_])

    return reg_fo_LM, reg_fo_S, reg_fo_LUM, reg_fx_LM, reg_fx_S, reg_fx_LUM
Ejemplo n.º 22
0
    def load_default(self,
                     machine_list=[
                         'lasso', 'tree', 'ridge', 'random_forest', 'svm'
                     ]):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """
        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(
                        self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = LinearSVR(
                        random_state=self.random_state).fit(
                            self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self
Ejemplo n.º 23
0
def save_standard_results(model_type, year):
    p_train, r_train, p_test, r_test = pipeline(year, "pixel_level",
                                                "daily_total")
    scale = preprocessing.StandardScaler()
    p_train = scale.fit_transform(p_train)
    p_test = scale.transform(p_test)

    if model_type == "Lasso":
        lr = linear_model.LassoCV()
    elif model_type == "Ridge":
        lr = linear_model.RidgeCV()
    elif model_type == "OLS":
        lr = linear_model.LinearRegression()
    elif model_type == "Elastic_Net":
        lr = linear_model.ElasticNetCV()
    else:
        raise TypeError("Invalid model_type")

    model = lr.fit(p_train, r_train)
    predicted_train = model.predict(p_train)
    predicted_test = model.predict(p_test)
    train_correlation = fast_evaluate.anti_correlation(predicted_train,
                                                       r_train)
    test_correlation = fast_evaluate.anti_correlation(predicted_test, r_test)
    train_rmse = fast_evaluate.root_mean_square_error(predicted_train, r_train)
    test_rmse = fast_evaluate.root_mean_square_error(predicted_test, r_test)
    train_mae = fast_evaluate.mean_absolute_error(predicted_train, r_train)
    test_mae = fast_evaluate.mean_absolute_error(predicted_test, r_test)
    coefficients = model.coef_.tolist()

    filename = "Standard_{0}_year_{1}.csv".format(model_type, year)
    with open(filename, 'wb') as f:
        writer = csv.writer(f)
        writer.writerow([
            "train_correlation", "test_correlation", "train_rmse", "test_rmse",
            "train_mae", "test_mae", "coefficients"
        ])
        writer.writerow((train_correlation, test_correlation, train_rmse,
                         test_rmse, train_mae, test_mae, coefficients))
Ejemplo n.º 24
0
def calculateRAPM(units, points, weights):
    u = DictVectorizer(sparse=False)
    u_mat = u.fit_transform(units)

    # config.debug
    # print(u_mat)
    # print(points[:25])
    # print(weights[:100])

    playerIDs = u.get_feature_names()
    # print(json.dumps(u.get_feature_names()[:25], indent=4*' '))
    # print(json.dumps(u.inverse_transform(u_mat)[:1], indent=4 * ' '))

    clf = linear_model.RidgeCV(alphas=(np.array([0.01, 0.1, 1.0, 10, 100, 500, 1000, 2000, 5000])), cv=5)
    clf.fit(u_mat, points, sample_weight=weights)
    # print(clf.alpha_)
    ratings = []
    for playerID in playerIDs:
        ratings.append((playerID, clf.coef_[playerIDs.index(playerID)]))
    ratings.sort(key=lambda tup: tup[1], reverse=True)

    return ratings
Ejemplo n.º 25
0
def train_models(mod,
                 save=True,
                 cutoff=0.999,
                 percent=50,
                 plot=True,
                 scale=False):

    if mod == 'linear':
        clf = linear_model.LinearRegression(n_jobs=-1)
    elif mod == 'lasso':
        clf = linear_model.Lasso(alpha=1000,
                                 max_iter=10000,
                                 tol=0.001,
                                 normalize=True,
                                 positive=True)
    elif mod == 'lassolars':
        clf = linear_model.LassoLars(alpha=0.001)
    elif mod == 'multilasso':
        clf = linear_model.MultiTaskLasso(alpha=0.1)
    elif mod == 'ridgeCV':
        clf = linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])
    elif mod == 'ridge':
        clf = linear_model.Ridge(alpha=[1000])
    elif mod == 'bayes':
        clf = linear_model.BayesianRidge()
    elif mod == 'huber':
        clf = linear_model.HuberRegressor()
    elif mod == 'poly':
        #clf = poly_clf()
        clf = PolynomialFeatures(degree=2)

    clf, continuum = train(clf,
                           mod,
                           save=save,
                           cutoff=cutoff,
                           percent=percent,
                           plot=plot,
                           scale=scale)
    return clf, continuum
    def _estimate_model(self):
        """Estimates ridge regression model.

        Returns
        -------
        model : sklearn ridge regression or ridge cv object
            Fitted ridge model.
        """
        self.underlying = linear_model.Ridge(fit_intercept=self.intercept)
        if (self.cv_folds is not None) or (self.solver in ['svd', 'eigen']):
            #Ridge CV by default tests a very limited set of alphas, we expand on this
            alphas = np.logspace(-10, 5, 100)
            model = linear_model.RidgeCV(alphas=alphas,
                                         cv=self.cv_folds,
                                         fit_intercept=self.intercept,
                                         gcv_mode=self.solver,
                                         **self.kwargs)
        else:
            model = linear_model.Ridge(fit_intercept=self.intercept,
                                       **self.kwargs)
        model.fit(self.x_train, self.y_train)
        return model
Ejemplo n.º 27
0
 def _get_generalizer(cls, gnrl):
   generalizers = dict(
   MEAN = MeanClassifier(),
   RFC = RandomForestClassifier(n_estimators=500, max_depth=32, n_jobs=-1, random_state=random_state),
   RCV = lm.RidgeCV(alphas=np.linspace(0, 200), cv=100),
   RCVp = RidgeCV_proba(alphas=np.linspace(0, 200), cv=100),
   LCV = lm.LassoCV(),
   LCVp = LassoCV_proba(),
   LSVC = svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=random_state),
   SVC = svm.SVC(C=1.0, kernel='linear', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=random_state),
   LR = lm.LogisticRegression(penalty='l2', dual=True, tol=0.00001, C=1, fit_intercept=True, intercept_scaling=1.0, class_weight=None, random_state=random_state),
   KNCuniform = KNeighborsClassifier(n_neighbors=1024, weights='uniform'),
   KNC = KNeighborsClassifier(n_neighbors=1024, weights='distance'),
   AUCR = AUCRegressor(),
   ABC_DTC = AdaBoostClassifier(
               algorithm='SAMME.R',
               base_estimator=DecisionTreeClassifier(compute_importances=None, criterion='gini', max_depth=1, max_features=1.0, min_density=None, min_samples_leaf=1, min_samples_split=2, random_state=random_state, splitter='best'),
               learning_rate=0.1, 
               n_estimators=200, 
               random_state=random_state),
   )
   return generalizers[gnrl]
Ejemplo n.º 28
0
    def __init__(self,
                 data,
                 classifier='linear',
                 save=True,
                 load=False,
                 fname='FASMA_ML.pkl'):
        self.classifier = classifier
        self.data = data
        self.save = save
        self.load = load
        self.fname = fname
        self.X_train, self.y_train = data.X, data.y

        if self.classifier == 'linear':
            self.clf = linear_model.LinearRegression(n_jobs=-1)
        elif self.classifier == 'lasso':
            self.clf = linear_model.Lasso(alpha=0.00001)
        elif self.classifier == 'lassolars':
            self.clf = linear_model.LassoLars(alpha=1000)
        elif self.classifier == 'multilasso':
            self.clf = linear_model.MultiTaskLasso(alpha=1000)
        elif self.classifier == 'ridgeCV':
            self.clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0, 100])
        elif self.classifier == 'ridge':
            self.clf = linear_model.Ridge(alpha=10)
        elif self.classifier == 'bayes':
            self.clf = linear_model.BayesianRidge()
        elif self.classifier == 'huber':
            self.clf = linear_model.HuberRegressor()

        # Train the classifier
        if not self.load:
            t = time()
            self.train_classifier()
            print('Trained classifier in {}s'.format(round(time() - t, 2)))
        else:
            with open(self.fname, 'rb') as f:
                self.clf = cPickle.load(f)
Ejemplo n.º 29
0
 def fit(self, train, y):
     internal_model = linear_model.RidgeCV(
         fit_intercept=True, cv=model_selection.TimeSeriesSplit(n_splits=2))
     bestscore = 1e15
     better = True
     indextrain = train.dropna().index
     limitlen = len(train) * self.limit_size_train
     while better:
         internal_model.fit(train.ix[indextrain], y.ix[indextrain])
         score = metrics.mean_squared_error(
             internal_model.predict(train.ix[indextrain]), y.ix[indextrain])
         if score < bestscore:
             bestscore = score
             self.bestmodel = internal_model
             residual = y.ix[indextrain] - internal_model.predict(
                 train.ix[indextrain])
             indextrain = residual[
                 abs(residual) <= abs(residual).quantile(self.quant)].index
             if len(indextrain) < limitlen:
                 better = False
         else:
             better = False
             self.bestmodel = internal_model
Ejemplo n.º 30
0
    def __remodel__(self, model_type, regr, __X_train, __Y_train):
        """
        Function to retrain certain models based on optimal alphas and/or ratios
        """
        if model_type == "ridge":
            alpha = regr.alpha_
            regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10)
        elif model_type == "lasso":
            alpha = regr.alpha_
            regr = linear_model.LassoCV(alphas=self.__realpha__(alpha),
                                        max_iter=5000,
                                        cv=10)
        elif model_type == "elasticnet":
            alpha = regr.alpha_
            ratio = regr.l1_ratio_
            regr = linear_model.ElasticNetCV(
                l1_ratio=self.__reratio__(ratio),
                alphas=self.__elasticnet_init["alpha"],
                max_iter=1000,
                cv=3)

        regr.fit(__X_train, __Y_train)
        return regr