def test_dropversion(self): store = self.om.models store.register_mixin(ModelVersionMixin) reg = LinearRegression() reg.coef_ = np.array([2]) reg.intercept_ = 10 store.put(reg, 'regmodel', tag='commit1') reg.coef_ = np.array([5]) reg.intercept_ = 0 store.put(reg, 'regmodel', tag='commit2')
def stagewise_regression(x, y, tolerance=1e-4, max_iterations=1e3, verbose=0): model = LinearRegression() model.coef_ = np.zeros(x.shape[1]) model.intercept_ = np.mean(y, axis=0) it, corr = 0, tolerance * 2 while abs(corr) > tolerance: it += 1 res = get_residuals(model, x, y) ix, corr = find_max_correlation(res, x) cf = get_coeff(x[:, ix], res) if cf == 0: print("[!!] Coefficient not being updated") break update_model(model, ix, cf) if verbose == 2: print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" % (np.dot(res, res), corr, ix, cf)) if it > max_iterations: print("[!!] Max iterations") break if verbose == 1: print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" % (np.dot(res, res), corr, ix, cf)) return model
def get_stack(X, Y): ridge_file = 'vif ridge2019-02-28 18/10/09.790796.sav' ridge_file = ridge_file.replace('/', ':') #python is looking for colons, not slashes ridge = pickle.load(open(ridge_file, 'rb')) linear_file = 'vif linear2019-02-28 18/10/09.871399.sav' linear_file = linear_file.replace('/', ':') #python is looking for colons, not slashes linear = pickle.load(open(linear_file, 'rb')) final_model = LinearRegression() final_model.intercept_ = (ridge.intercept_ + linear.intercept_)/2 final_model.coef_ = (ridge.coef_ + linear.coef_)/2 final_model.predict(X) final_model.score(X, Y) suffix = str(datetime.datetime.now()) model_filename = 'vif stack' + suffix +'.sav' pickle.dump(linear, open(model_filename, 'wb')) csv_filename = 'vif stack ' + suffix + '.csv' raw_test, test_IDs = load_test() predict = final_model.predict(raw_test) predict = np.exp(predict) predict = pd.DataFrame(predict) predict = pd.concat([test_IDs, predict], axis = 1) predict.columns = ['Id', 'SalePrice'] predict.to_csv(csv_filename, index=False)
def get_prediction(score_name, tokenizer, model, sentence): s = pd.read_pickle(score_name) d = s['data'] coeffs = d.layer_weights[0][-1].values intercept = d.layer_weights[0][-1].intercept.values new_model = LinearRegression() new_model.intercept_ = intercept new_model.coef_ = coeffs inputs = tokenizer(sentence, return_tensors="pt") outputs = model(**inputs, labels=inputs["input_ids"], output_hidden_states=True) hiddenStates = outputs.hidden_states hiddenStatesLayer = hiddenStates[-1] lastWordState = hiddenStatesLayer[-1, :].detach().numpy() lastWordState = lastWordState[-1].reshape(1, -1) prediction = new_model.predict(lastWordState) return prediction
def deserialize_linear_regressor(model_dict): model = LinearRegression(model_dict['params']) model.coef_ = np.array(model_dict['coef_']) model.intercept_ = np.array(model_dict['intercept_']) return model
def load_model(path): with open(path, 'r') as model_file: model_dict = json.loads(model_file.read()) model = LinearRegression() model.coef_ = np.array(model_dict['coef']) model.intercept_ = np.array(model_dict['intercept']) return model
def getResult(year, model, odometer, condition, engine, transmission, cylinders, drive): linreg = LinearRegression() # Pretrained coefficients linreg.coef_ = np.array([ 4.15688785e+02, -4.43167349e-02, 2.12651230e-10, -4.29025704e-11, 2.51503707e-10, 8.23214342e+02, -6.41425274e+02, -1.42694245e+03, 9.45874490e-11, -1.32009177e+02, -3.30702492e+03, -5.98655733e+02, -3.25718092e+02, -4.17621623e+02, 9.98182003e+02, 5.17219110e+02, 6.56854016e+02, 3.71803894e+02, 2.42936761e+03, -1.84882795e+02, -5.00999670e+02, 4.94002073e+02, -2.02945042e+03, -5.59599756e+02, -2.05897742e+03, -2.34529423e+03, 4.04687899e+02, 1.52142986e+03, 9.98071313e+02, 1.98711159e+02, -8.07920998e+02, -1.82481530e+03, 1.21496766e+03, 8.08414295e+02, -2.64840938e+03, 5.51285004e+03, -1.12757053e+03, -8.81546752e+02, -1.11122893e+03, 3.93289308e+02, 9.87711205e+02, 2.20741028e+03, 1.33915108e+03, -3.31410140e+02, -8.48852168e+02, -1.15545497e+02 ]) linreg.intercept_ = -818769.414838612 input_arr = processInputs(year, model, odometer, condition, engine, transmission, cylinders, drive) result = linreg.predict(input_arr) return result[0]
def main(args): # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data(args.input + 'data.h5', train=True, background=True) # Fill Tau21 profile profile = fill_profile(data, VAR_TAU21) # Fit profile fit = ROOT.TF1('fit', 'pol1', *FIT_RANGE) profile.Fit('fit', 'RQ0') intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1) intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1) # Create scikit-learn transform ddt = LinearRegression() ddt.coef_ = np.array([coef_val]) ddt.intercept_ = np.array([-coef_val * FIT_RANGE[0]]) ddt.offset_ = np.array([coef_val * FIT_RANGE[0] + intercept_val]) print "Fitted function:" print " intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err) print " coef: {:7.4f} ± {:7.4f}".format(coef_val, coef_err) # Save DDT transform saveclf(ddt, 'models/ddt/ddt.pkl.gz') return 0
def deserialize_linear_regressor(model_dict): model = LinearRegression(model_dict["params"]) model.coef_ = np.array(model_dict["coef_"]) model.intercept_ = np.array(model_dict["intercept_"]) return model
def predict(load_data, start_date, end_date, model, y_column='Load'): coefficients = model['coefficients'] intercept = model['intercept'] x_columns = model['x_columns'] corrected_column = model['corrected_column'] start_date = datetime.strptime(start_date, "%Y-%m-%d") end_date = datetime.strptime(end_date, "%Y-%m-%d") test_data_df = load_data[(load_data['Date'] >= start_date) & (load_data['Date'] <= end_date)] x_test = test_data_df[x_columns] regressor = LinearRegression() regressor.coef_ = np.array(coefficients) regressor.intercept_ = np.array(intercept) try: y_pred = regressor.predict(x_test) except Exception as predict_error: raise predict_error if (corrected_column != None): y_pred = test_data_df[corrected_column] - y_pred return y_pred.tolist()
def sample(self, data, model_name='linear_regression'): models = [] for key in self.training_data: model = LinearRegression() if model_name == 'linear_regression': model = LinearRegression() elif model_name == 'ridge_regression': model = Ridge() elif model_name == 'kernal_ridge': model = KernelRidge() model_coef = self.coef[key] model.coef_ = model_coef[0] model.intercept_ = model_coef[1] models.append(model) X = data.iloc[:, 12:-3] X["block"] = data["block"] X = X.to_numpy() qdt_prediction = [] for model in tqdm(models): y = model.predict(X) brate = self.predict_BRate(data, y).to_numpy() qdt_prediction.append(brate) data["qdt_prediction"] = np.mean(qdt_prediction, axis=0) return data
def predict(instance, coef_, intercept_): # input: instance matrix, coef_ array and intercept_ array # ouput: list of predictions for input instances regressor = LinearRegression(fit_intercept=True) regressor.coef_ = coef_ regressor.intercept_ = intercept_ predictions = regressor.predict(instance) return predictions
def LR_predict(): X = json.loads(request.form['X']) params = json.loads(request.form['params']) reg = LinearRegression() reg.coef_ = np.array(params['coef']) reg.intercept_ = params['inter'] y = reg.predict(X) return jsonify(pred=list(y))
def createModel(company): # query the past day's news # news_dict = query_news_articles(company, prev_date, curr_date, trading_dates, all_sources) # get company ticker # ticker = df[df['Name'] == company]['Symbol'].values[0] ticker = ticker_dict[company] # create model MSE_list_AR, MSE_list_ADL, intercept_AR, intercept_ADL, coef_AR, coef_ADL,\ best_AR_train_index, best_AR_test_index, best_ADL_train_index, best_ADL_test_index = main_read_in_csv(ticker) # AR model model_AR = LinearRegression(normalize=True) model_AR.intercept_ = intercept_AR model_AR.coef_ = coef_AR # ADL model model_ADL = LinearRegression(normalize=True) model_ADL.intercept_ = intercept_ADL model_ADL.coef_ = coef_ADL # predict values for tomorrow prediction = {} prediction['AR'] = predict_next_value(ticker, company, model_AR, is_ADL=False) prediction['ADL'] = predict_next_value(ticker, company,model_ADL, is_ADL=True) plot_AR = plot_AR_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index) plot_ADL = plot_ADL_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index) # plot dict plot_dict = {} plot_dict['MSE_labels'] = [1,2,3,4,5,6,7,8] plot_dict['MSE_AR_values'] = MSE_list_AR plot_dict['MSE_ADL_values'] = MSE_list_ADL plot_dict['comp_AR_label'] = plot_AR['x_val'] plot_dict['comp_ADL_label'] = plot_ADL['x_val'] plot_dict['comp_AR_actual'] = plot_AR['y_actual'] plot_dict['comp_AR_predict'] = plot_AR['y_predict'] plot_dict['comp_ADL_actual'] = plot_ADL['y_actual'] plot_dict['comp_ADL_predict'] = plot_ADL['y_predict'] return plot_dict, prediction
def test_via_runtime(self): store = self.om.models store.register_mixin(ModelVersionMixin) reg = LinearRegression() reg.coef_ = np.array([2]) reg.intercept_ = 10 store.put(reg, 'regmodel', tag='commit1') reg.coef_ = np.array([5]) reg.intercept_ = 0 store.put(reg, 'regmodel', tag='commit2') # via past version pointer r1 = self.om.runtime.model('regmodel^').predict([10]).get() r2 = self.om.runtime.model('regmodel').predict([10]).get() self.assertEqual(r1[0], 10 * 2 + 10) self.assertEqual(r2[0], 10 * 5 + 0) # via version tag r1 = self.om.runtime.model('regmodel@commit1').predict([10]).get() r2 = self.om.runtime.model('regmodel@commit2').predict([10]).get() self.assertEqual(r1[0], 10 * 2 + 10) self.assertEqual(r2[0], 10 * 5 + 0)
def LotFrontage_imputer(self): #linear regression for lotfrontage vs lotarea after removing outliers, setting a max at 200 based on visualization lr = LinearRegression() lr.coef_ = np.array([0.00215388]) lr.intercept_ = 48.640713607035664 impute_pred = pd.DataFrame(lr.predict( self.df.LotArea[self.df.LotFrontage.isnull()].values.reshape( -1, 1)), columns=['LR_Pred']) impute_pred['Max'] = 200 self.df.loc[self.df.LotFrontage.isnull(), 'LotFrontage'] = impute_pred.min(1).values
def best_subset_regression(data, dependentVar, factorNames, options): """Return the factor loadings using best subset regression. INPUTS: data: pandas df, data matrix, should constain the date column and all of the factorNames columns dependentVar: string, name of dependent variable factorNames: list, elements should be strings, names of the independent variables options: dictionary, should constain at least two elements, timeperiod, and date timeperiod: string, if == all, means use entire dataframe, otherwise filter the df on this value date: name of datecol returnModel: boolean, if true, returns model maxVars: int, maximum number of factors that can have a non zero loading in the resulting regression printLoadings: boolean, if true, prints the coeficients Outputs: reg: regression object from sikitlearn also prints what was desired """ # Check dictionary for maxVars option if ('maxVars' not in options.keys()): print('maxVars not specified in options') return if (options['timeperiod'] == 'all'): newData = data.copy() else: newData = data.copy() newData = newData.query(options['timeperiod']) # this is error because we do not have cvxpy in Anaconda, so best_subset # is commented out alpha, beta = best_subset(data[factorNames].values, data[dependentVar].values, options['maxVars']) beta[np.abs(beta) <= 1e-7] = 0.0 if (options['printLoadings']): print_timeperiod(newData, dependentVar, options) print('Max Number of Non-Zero Variables is ' + str(options['maxVars'])) display_factor_loadings(alpha, beta, factorNames, options) if (options['returnModel']): out = LinearRegression() out.intercept_ = alpha[0] out.coef_ = beta return out
def main(args): # Initialise args, cfg = initialise(args) # Load data data, features, _ = load_data(args.input + 'data.h5', train=True, background=True) #variable = VAR_TAU21 variable = VAR_N2 #variable = VAR_DECDEEP #variable = VAR_DEEP # Fill variable profile profile = fill_profile(data, variable) # Fit profile if variable == VAR_N2: fit_range = FIT_RANGE_N2 elif variable == VAR_TAU21: fit_range = FIT_RANGE_TAU21 elif variable == VAR_DECDEEP: fit_range = FIT_RANGE_DECDEEP elif variable == VAR_DEEP: fit_range = FIT_RANGE_DEEP else: print "variable invalid" return 0 fit = ROOT.TF1('fit', 'pol1', *fit_range) profile.Fit('fit', 'RQ0') intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1) intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1) # Create scikit-learn transform ddt = LinearRegression() ddt.coef_ = np.array([coef_val]) ddt.intercept_ = np.array([-coef_val * fit_range[0]]) ddt.offset_ = np.array([coef_val * fit_range[0] + intercept_val]) print "Fitted function:" print " intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err) print " coef: {:7.4f} ± {:7.4f}".format(coef_val, coef_err) # Save DDT transform saveclf(ddt, 'models/ddt/ddt_{}.pkl.gz'.format(variable)) print "got to the end of main()" return 0
def evaluate(self, x: np.ndarray, y: np.ndarray, metric: str = "neg_mean_absolute_error") -> float: """ Evaluate the linear models using x, and y test data Args: x (np.ndarray): MxN input data array y (np.ndarray): M output targets metric (str): scorer function, used with sklearn.metrics.get_scorer Returns: """ metric_func = get_scorer(metric) lr = LinearRegression(fit_intercept=False) lr.coef_ = self.coef_[self.indices] # type: ignore lr.intercept_ = 0 return metric_func(lr, x[:, self.indices], y)
def test_predict_from_data_inline_versions(self): X = np.arange(10).reshape(-1, 1) y = X * 2 # train model locally clf = LinearRegression() clf.fit(X, y) result = clf.predict(X) # store model in om self.om.models.put(clf, 'regression', tag='commit1') clf.intercept_ = 10 self.om.models.put(clf, 'regression', tag='commit2') # check we can use it to predict previous version resp = self.client.put('/api/v1/model/regression^/predict', json={ 'columns': ['v'], 'data': dict(v=[5]), }, auth=self.auth, headers=self._async_headers) resp = self._check_async(resp) self.assertEqual(resp.status_code, 200) data = resp.get_json()['response'] self.assertEqual(data.get('model'), 'regression^') assert_almost_equal(data.get('result'), [10.]) # check we can use it to predict current version resp = self.client.put('/api/v1/model/regression/predict', json={ 'columns': ['v'], 'data': dict(v=[5]), }, auth=self.auth, headers=self._headers) self.assertEqual(resp.status_code, 200) data = resp.get_json() self.assertEqual(data.get('model'), 'regression') assert_almost_equal(data.get('result'), [20.]) # check we can use it to predict tagged version resp = self.client.put('/api/v1/model/regression@commit1/predict', json={ 'columns': ['v'], 'data': dict(v=[5]), }, auth=self.auth, headers=self._headers) self.assertEqual(resp.status_code, 200) data = resp.get_json() self.assertEqual(data.get('model'), 'regression@commit1') assert_almost_equal(data.get('result'), [10.])
def create_result_summary(self, model_name='linear_regression'): models = {} for key in self.training_data: model = LinearRegression() if model_name == 'linear_regression': model = LinearRegression() elif model_name == 'ridge_regression': model = Ridge() elif model_name == 'kernal_ridge': model = KernelRidge() model_coef = self.coef[key] model.coef_ = model_coef[0] model.intercept_ = model_coef[1] models[key] = model df = self.predict_attraction(self.training_data, models=models) df.to_csv("results/{}_result.csv".format(self.exp_name))
def load(dir): """import a bk model as a sklearn model""" meta_f, params_f = _paths(dir) meta = json.load(open(meta_f, 'r')) type = meta['type'] # only supports linear regression at the moment assert type == 'linear_regression' h5f = h5py.File(params_f, 'r') coef = h5f['coef'][:] intercept = h5f['intercept'][()] # to retrieve scalar values h5f.close() model = LinearRegression() model.coef_ = coef model.intercept_ = intercept return model
def linear_regression(X_train, Y_train, xval=None): """ Create linear regression model on data X with labels Y. """ if not xval: model = LinearRegression(fit_intercept=False).fit( X_train, log_scale(Y_train)) else: model = LinearRegression(fit_intercept=False) results = cross_validate(model, X_train, log_scale(Y_train), cv=xval, return_estimator=True) coefs = np.array([float(m.coef_) for m in results['estimator']]) avg_coef = np.mean(coefs) model.coef_ = np.array([[avg_coef]]) model.intercept_ = 0 return model
def best_subset_regression(data, dependentVar, factorNames, options): '''best_subset_regression takes in a dataset and returns the factor loadings using best subset regression INPUTS: data: pandas df, data matrix, should constain the date column and all of the factorNames columns dependentVar: string, name of dependent variable factorNames: list, elements should be strings, names of the independent variables options: dictionary, should constain at least two elements, timeperiod, and date timeperiod: string, if == all, means use entire dataframe, otherwise filter the df on this value date: name of datecol returnModel: boolean, if true, returns model maxVars: int, maximum number of factors that can have a non zero loading in the resulting regression Outputs: reg: regression object from sikitlearn also prints what was desired ''' #Check dictionary for maxVars option if ('maxVars' not in options.keys()): print('maxVars not specified in options') return if (options['timeperiod'] == 'all'): newData = data.copy() else: newData = data.copy() newData = newData.query(options['timeperiod']) #perform linear regression alpha, beta = best_subset(data[factorNames].values, data[dependentVar].values, options['maxVars']) if (options['printLoadings'] == True): #Now print the results print_timeperiod(newData, dependentVar, options) #Now print the factor loadings display_factor_loadings(alpha, beta, factorNames, options) if (options['returnModel']): out = LinearRegression() out.intercept_ = alpha[0] out.coef_ = beta return out
def parse_MATLAB_model(model_json): """ Reading in a stored matlab z score model for use in python :param model_json: dict read from the JSON saved by MATLAB :return: a dict with the necessary information """ # muscle = model_json['Muscle'] rmse = model_json['RMSE'] mse = model_json['MSE'] dfe = model_json['DFE'] coefficient_cov = model_json['CoeffCov'] intercept = model_json['Coefficients'][0] names = model_json['CoefficientNames'][1:] coeff = model_json['Coefficients'][1:] lr = LinearRegression() lr.coef_ = np.array(coeff) lr.intercept_ = intercept return {'model': lr, 'coefficient_names': names, 'rmse': rmse, 'dfe': dfe, 'mse': mse, 'coefficient_cov': np.array(coefficient_cov)}
def calculate_coefficients(star_objects, group, sample_weights=None, set_slope=False): """ Parameters -------- star_objects: list Contains a list of Star objects group: int, 1 or 0 1 is slow rotators, 0 is fast sample_weights: list, optional, default 1 containts list of weights of the values w.r.t. the line return_linregg: boolean, optional, default False Whether to return the object of the linear regression to use things such as predict set_slope: boolean, optional, default False Sets the slope to 0 Returns ------ LinearRegression: class object, optional, default False list: [b0,b1,b2] """ lr = LinearRegression() lr.fit( [star.predictors for star in star_objects if star.group == group], [star.period for star in star_list if star.group == group], sample_weights, ) if set_slope == True: # sets the coefs to zero lr.coef_ = np.zeros(len(star_objects[0].predictors)) lr.intercept_ = np.average( [star.period for star in star_list if star.group == group], weights=sample_weights, ) return np.append(lr.intercept_, lr.coef_[1:]), lr
def __init__(self, msg, feature_names=None): self.models = [] self.coef = [] for m in msg.Structure.Components: s = None if m.LinearCoeff: s = LinearRegression() s.intercept_ = m.LinearCoeff.Intercept if feature_names is None: s.coef_ = np.zeros(len(m.LinearCoeff.Coeff)) else: s.coef_ = np.zeros(len(feature_names)) for i, elem in enumerate(m.LinearCoeff.Coeff): if feature_names is None: s.coef_[i] = elem.Coeff else: l = feature_names.get_loc(elem.Feature) s.coef_[l] = elem.Coeff self.models.append(s) if m.Coeff: self.coef.append(m.Coeff) else: self.coef.append(1.0)
def avaliaRegistros(df, intpt, coef): # Dataframe, coef, intercept #pega os parâmetros do df x = df.iloc[:,:1].values y = df.iloc[:,2:3].values #instancia o regressor regressor = LinearRegression() regressor.fit(x,y) regressor.intercept_ = intpt regressor.coef_[0] = coef #monta o regressor com os parâmetros enviados na função derror = y - regressor.predict(x) percent = 0 soma=0 contaerro=0 totalleitura = len(derror) for leitura in derror: soma +=leitura if leitura>=0: contaerro += 1 percent = (100*contaerro)/totalleitura # Porcentagem de erro Positivo: percent / Soma do erro: soma retornoDesempenho = [soma[0],percent] return retornoDesempenho
def strategy_train_opt_mix(self, train, valid): df = pd.concat([train.iloc[:50], valid], axis=0) y_preds = self._predict(df) num = y_preds.shape[1] m = LinearRegression(fit_intercept=False) m.intercept_ = 0.0 best_loss = None best_coef = None for alpha in np.linspace(0.0, 1.0, 101): beta = (1 - alpha) / (num - 1) coef_ = np.array([alpha] + [beta] * (num - 1)) assert abs(sum(coef_) - 1.0) < 1e-6 y_pred = np.sum(np.multiply(y_preds, coef_), axis=1) y_pred = pd.Series(y_pred, index=df.index) loss = self.loss(y_pred, df['target'], df['weight']) if best_loss is None or loss < best_loss: best_loss = loss best_coef = coef_ m.coef_ = best_coef logging.info(f"OptMix.coef_={m.coef_} best_loss={best_loss:.4f}") self.strategy_model = m
def build_linear_model(rotation, translation): m = LinearRegression() m.coef_ = rotation m.intercept_ = translation return m