예제 #1
0
 def test_dropversion(self):
     store = self.om.models
     store.register_mixin(ModelVersionMixin)
     reg = LinearRegression()
     reg.coef_ = np.array([2])
     reg.intercept_ = 10
     store.put(reg, 'regmodel', tag='commit1')
     reg.coef_ = np.array([5])
     reg.intercept_ = 0
     store.put(reg, 'regmodel', tag='commit2')
예제 #2
0
def stagewise_regression(x, y, tolerance=1e-4, max_iterations=1e3, verbose=0):
    model = LinearRegression()
    model.coef_ = np.zeros(x.shape[1])
    model.intercept_ = np.mean(y, axis=0)

    it, corr = 0, tolerance * 2
    while abs(corr) > tolerance:
        it += 1
        res = get_residuals(model, x, y)
        ix, corr = find_max_correlation(res, x)
        cf = get_coeff(x[:, ix], res)
        if cf == 0:
            print("[!!] Coefficient not being updated")
            break
        update_model(model, ix, cf)
        if verbose == 2:
            print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" %
                  (np.dot(res, res), corr, ix, cf))
        if it > max_iterations:
            print("[!!] Max iterations")
            break
    if verbose == 1:
        print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" %
              (np.dot(res, res), corr, ix, cf))
    return model
예제 #3
0
def get_stack(X, Y):
        
    ridge_file = 'vif ridge2019-02-28 18/10/09.790796.sav'
    ridge_file = ridge_file.replace('/', ':') #python is looking for colons, not slashes
    ridge = pickle.load(open(ridge_file, 'rb'))
    
    linear_file = 'vif linear2019-02-28 18/10/09.871399.sav'
    linear_file = linear_file.replace('/', ':') #python is looking for colons, not slashes
    linear = pickle.load(open(linear_file, 'rb'))
    
    final_model = LinearRegression()
    
    final_model.intercept_ = (ridge.intercept_ + linear.intercept_)/2
    final_model.coef_ = (ridge.coef_ + linear.coef_)/2
    
    final_model.predict(X)
    final_model.score(X, Y)
    
    suffix = str(datetime.datetime.now())
    model_filename = 'vif stack' + suffix +'.sav'
    pickle.dump(linear, open(model_filename, 'wb'))
    csv_filename = 'vif stack ' + suffix + '.csv'
    
    raw_test, test_IDs = load_test()
    predict = final_model.predict(raw_test)
    predict = np.exp(predict)
    predict = pd.DataFrame(predict)
    predict = pd.concat([test_IDs, predict], axis = 1)
    predict.columns = ['Id', 'SalePrice']
    predict.to_csv(csv_filename, index=False)
def get_prediction(score_name, tokenizer, model, sentence):

    s = pd.read_pickle(score_name)
    d = s['data']

    coeffs = d.layer_weights[0][-1].values

    intercept = d.layer_weights[0][-1].intercept.values

    new_model = LinearRegression()
    new_model.intercept_ = intercept
    new_model.coef_ = coeffs

    inputs = tokenizer(sentence, return_tensors="pt")
    outputs = model(**inputs,
                    labels=inputs["input_ids"],
                    output_hidden_states=True)

    hiddenStates = outputs.hidden_states

    hiddenStatesLayer = hiddenStates[-1]

    lastWordState = hiddenStatesLayer[-1, :].detach().numpy()

    lastWordState = lastWordState[-1].reshape(1, -1)

    prediction = new_model.predict(lastWordState)

    return prediction
예제 #5
0
def deserialize_linear_regressor(model_dict):
    model = LinearRegression(model_dict['params'])

    model.coef_ = np.array(model_dict['coef_'])
    model.intercept_ = np.array(model_dict['intercept_'])

    return model
예제 #6
0
def load_model(path):
    with open(path, 'r') as model_file:
        model_dict = json.loads(model_file.read())
    model = LinearRegression()
    model.coef_ = np.array(model_dict['coef'])
    model.intercept_ = np.array(model_dict['intercept'])
    return model
예제 #7
0
def getResult(year, model, odometer, condition, engine, transmission,
              cylinders, drive):
    linreg = LinearRegression()
    # Pretrained coefficients
    linreg.coef_ = np.array([
        4.15688785e+02, -4.43167349e-02, 2.12651230e-10, -4.29025704e-11,
        2.51503707e-10, 8.23214342e+02, -6.41425274e+02, -1.42694245e+03,
        9.45874490e-11, -1.32009177e+02, -3.30702492e+03, -5.98655733e+02,
        -3.25718092e+02, -4.17621623e+02, 9.98182003e+02, 5.17219110e+02,
        6.56854016e+02, 3.71803894e+02, 2.42936761e+03, -1.84882795e+02,
        -5.00999670e+02, 4.94002073e+02, -2.02945042e+03, -5.59599756e+02,
        -2.05897742e+03, -2.34529423e+03, 4.04687899e+02, 1.52142986e+03,
        9.98071313e+02, 1.98711159e+02, -8.07920998e+02, -1.82481530e+03,
        1.21496766e+03, 8.08414295e+02, -2.64840938e+03, 5.51285004e+03,
        -1.12757053e+03, -8.81546752e+02, -1.11122893e+03, 3.93289308e+02,
        9.87711205e+02, 2.20741028e+03, 1.33915108e+03, -3.31410140e+02,
        -8.48852168e+02, -1.15545497e+02
    ])

    linreg.intercept_ = -818769.414838612

    input_arr = processInputs(year, model, odometer, condition, engine,
                              transmission, cylinders, drive)
    result = linreg.predict(input_arr)
    return result[0]
예제 #8
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    # Fill Tau21 profile
    profile = fill_profile(data, VAR_TAU21)

    # Fit profile
    fit = ROOT.TF1('fit', 'pol1', *FIT_RANGE)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * FIT_RANGE[0]])
    ddt.offset_ = np.array([coef_val * FIT_RANGE[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt.pkl.gz')

    return 0
예제 #9
0
def deserialize_linear_regressor(model_dict):
    model = LinearRegression(model_dict["params"])

    model.coef_ = np.array(model_dict["coef_"])
    model.intercept_ = np.array(model_dict["intercept_"])

    return model
예제 #10
0
def predict(load_data, start_date, end_date, model, y_column='Load'):
    coefficients = model['coefficients']
    intercept = model['intercept']
    x_columns = model['x_columns']
    corrected_column = model['corrected_column']

    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    test_data_df = load_data[(load_data['Date'] >= start_date)
                             & (load_data['Date'] <= end_date)]

    x_test = test_data_df[x_columns]

    regressor = LinearRegression()
    regressor.coef_ = np.array(coefficients)
    regressor.intercept_ = np.array(intercept)

    try:
        y_pred = regressor.predict(x_test)
    except Exception as predict_error:
        raise predict_error

    if (corrected_column != None):
        y_pred = test_data_df[corrected_column] - y_pred

    return y_pred.tolist()
예제 #11
0
    def sample(self, data, model_name='linear_regression'):
        models = []
        for key in self.training_data:
            model = LinearRegression()
            if model_name == 'linear_regression':
                model = LinearRegression()
            elif model_name == 'ridge_regression':
                model = Ridge()
            elif model_name == 'kernal_ridge':
                model = KernelRidge()
            model_coef = self.coef[key]
            model.coef_ = model_coef[0]
            model.intercept_ = model_coef[1]
            models.append(model)
        X = data.iloc[:, 12:-3]

        X["block"] = data["block"]
        X = X.to_numpy()
        qdt_prediction = []
        for model in tqdm(models):
            y = model.predict(X)

            brate = self.predict_BRate(data, y).to_numpy()

            qdt_prediction.append(brate)
        data["qdt_prediction"] = np.mean(qdt_prediction, axis=0)
        return data
def predict(instance, coef_, intercept_):
    # input: instance matrix, coef_ array and intercept_ array
    # ouput: list of predictions for input instances
    regressor = LinearRegression(fit_intercept=True)
    regressor.coef_ = coef_
    regressor.intercept_ = intercept_
    predictions = regressor.predict(instance)
    return predictions
예제 #13
0
def LR_predict():
    X = json.loads(request.form['X'])
    params = json.loads(request.form['params'])
    reg = LinearRegression()
    reg.coef_ = np.array(params['coef'])
    reg.intercept_ = params['inter']
    y = reg.predict(X)
    return jsonify(pred=list(y))
예제 #14
0
def createModel(company):
    # query the past day's news 
    # news_dict = query_news_articles(company, prev_date, curr_date, trading_dates, all_sources)
      
    # get company ticker
    # ticker = df[df['Name'] == company]['Symbol'].values[0]
    ticker = ticker_dict[company]

    # create model
    MSE_list_AR, MSE_list_ADL, intercept_AR, intercept_ADL, coef_AR, coef_ADL,\
               best_AR_train_index, best_AR_test_index, best_ADL_train_index, best_ADL_test_index = main_read_in_csv(ticker)
    # AR model
    model_AR = LinearRegression(normalize=True)
    model_AR.intercept_ = intercept_AR
    model_AR.coef_ = coef_AR

    # ADL model
    model_ADL = LinearRegression(normalize=True)
    model_ADL.intercept_ = intercept_ADL
    model_ADL.coef_ = coef_ADL

    # predict values for tomorrow  
    prediction = {}
    prediction['AR'] = predict_next_value(ticker, company, model_AR, is_ADL=False)
    prediction['ADL'] = predict_next_value(ticker, company,model_ADL, is_ADL=True)

    plot_AR = plot_AR_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index)
    plot_ADL = plot_ADL_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index)

    # plot dict
    plot_dict = {}
    plot_dict['MSE_labels'] = [1,2,3,4,5,6,7,8]
    plot_dict['MSE_AR_values'] = MSE_list_AR
    plot_dict['MSE_ADL_values'] = MSE_list_ADL
    plot_dict['comp_AR_label'] = plot_AR['x_val']
    plot_dict['comp_ADL_label'] = plot_ADL['x_val']
    plot_dict['comp_AR_actual'] = plot_AR['y_actual']
    plot_dict['comp_AR_predict'] = plot_AR['y_predict']
    plot_dict['comp_ADL_actual'] = plot_ADL['y_actual']
    plot_dict['comp_ADL_predict'] = plot_ADL['y_predict']

    return plot_dict, prediction
예제 #15
0
 def test_via_runtime(self):
     store = self.om.models
     store.register_mixin(ModelVersionMixin)
     reg = LinearRegression()
     reg.coef_ = np.array([2])
     reg.intercept_ = 10
     store.put(reg, 'regmodel', tag='commit1')
     reg.coef_ = np.array([5])
     reg.intercept_ = 0
     store.put(reg, 'regmodel', tag='commit2')
     # via past version pointer
     r1 = self.om.runtime.model('regmodel^').predict([10]).get()
     r2 = self.om.runtime.model('regmodel').predict([10]).get()
     self.assertEqual(r1[0], 10 * 2 + 10)
     self.assertEqual(r2[0], 10 * 5 + 0)
     # via version tag
     r1 = self.om.runtime.model('regmodel@commit1').predict([10]).get()
     r2 = self.om.runtime.model('regmodel@commit2').predict([10]).get()
     self.assertEqual(r1[0], 10 * 2 + 10)
     self.assertEqual(r2[0], 10 * 5 + 0)
예제 #16
0
    def LotFrontage_imputer(self):
        #linear regression for lotfrontage vs lotarea after removing outliers, setting a max at 200 based on visualization
        lr = LinearRegression()
        lr.coef_ = np.array([0.00215388])
        lr.intercept_ = 48.640713607035664

        impute_pred = pd.DataFrame(lr.predict(
            self.df.LotArea[self.df.LotFrontage.isnull()].values.reshape(
                -1, 1)),
                                   columns=['LR_Pred'])
        impute_pred['Max'] = 200

        self.df.loc[self.df.LotFrontage.isnull(),
                    'LotFrontage'] = impute_pred.min(1).values
예제 #17
0
def best_subset_regression(data, dependentVar, factorNames, options):
    """Return the factor loadings using best subset regression.

    INPUTS:
        data: pandas df, data matrix, should constain the date column
        and all of the factorNames columns
        dependentVar: string, name of dependent variable
        factorNames: list, elements should be strings, names of the
        independent variables
        options: dictionary, should constain at least two elements,
        timeperiod, and date
            timeperiod: string, if == all, means use entire dataframe,
            otherwise filter the df on this value
            date: name of datecol
            returnModel: boolean, if true, returns model
            maxVars: int, maximum number of factors that can have a
            non zero loading in the resulting regression
            printLoadings: boolean, if true, prints the coeficients
    Outputs:
        reg: regression object from sikitlearn
        also prints what was desired
    """
    # Check dictionary for maxVars option
    if ('maxVars' not in options.keys()):
        print('maxVars not specified in options')
        return

    if (options['timeperiod'] == 'all'):
        newData = data.copy()
    else:
        newData = data.copy()
        newData = newData.query(options['timeperiod'])

    # this is error because we do not have cvxpy in Anaconda, so best_subset
    # is commented out
    alpha, beta = best_subset(data[factorNames].values,
                              data[dependentVar].values, options['maxVars'])
    beta[np.abs(beta) <= 1e-7] = 0.0

    if (options['printLoadings']):
        print_timeperiod(newData, dependentVar, options)
        print('Max Number of Non-Zero Variables is ' + str(options['maxVars']))
        display_factor_loadings(alpha, beta, factorNames, options)

    if (options['returnModel']):
        out = LinearRegression()
        out.intercept_ = alpha[0]
        out.coef_ = beta
        return out
예제 #18
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    #variable = VAR_TAU21
    variable = VAR_N2
    #variable = VAR_DECDEEP
    #variable = VAR_DEEP

    # Fill variable profile
    profile = fill_profile(data, variable)

    # Fit profile
    if variable == VAR_N2:
        fit_range = FIT_RANGE_N2
    elif variable == VAR_TAU21:
        fit_range = FIT_RANGE_TAU21
    elif variable == VAR_DECDEEP:
        fit_range = FIT_RANGE_DECDEEP
    elif variable == VAR_DEEP:
        fit_range = FIT_RANGE_DEEP
    else:
        print "variable invalid"
        return 0
    fit = ROOT.TF1('fit', 'pol1', *fit_range)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * fit_range[0]])
    ddt.offset_ = np.array([coef_val * fit_range[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt_{}.pkl.gz'.format(variable))
    print "got to the end of main()"
    return 0
예제 #19
0
 def evaluate(self, x: np.ndarray, y: np.ndarray, metric: str = "neg_mean_absolute_error") -> float:
     """
     Evaluate the linear models using x, and y test data
     Args:
         x (np.ndarray): MxN input data array
         y (np.ndarray): M output targets
         metric (str): scorer function, used with
             sklearn.metrics.get_scorer
     Returns:
     """
     metric_func = get_scorer(metric)
     lr = LinearRegression(fit_intercept=False)
     lr.coef_ = self.coef_[self.indices]  # type: ignore
     lr.intercept_ = 0
     return metric_func(lr, x[:, self.indices], y)
예제 #20
0
 def test_predict_from_data_inline_versions(self):
     X = np.arange(10).reshape(-1, 1)
     y = X * 2
     # train model locally
     clf = LinearRegression()
     clf.fit(X, y)
     result = clf.predict(X)
     # store model in om
     self.om.models.put(clf, 'regression', tag='commit1')
     clf.intercept_ = 10
     self.om.models.put(clf, 'regression', tag='commit2')
     # check we can use it to predict previous version
     resp = self.client.put('/api/v1/model/regression^/predict',
                            json={
                                'columns': ['v'],
                                'data': dict(v=[5]),
                            },
                            auth=self.auth,
                            headers=self._async_headers)
     resp = self._check_async(resp)
     self.assertEqual(resp.status_code, 200)
     data = resp.get_json()['response']
     self.assertEqual(data.get('model'), 'regression^')
     assert_almost_equal(data.get('result'), [10.])
     # check we can use it to predict current version
     resp = self.client.put('/api/v1/model/regression/predict',
                            json={
                                'columns': ['v'],
                                'data': dict(v=[5]),
                            },
                            auth=self.auth,
                            headers=self._headers)
     self.assertEqual(resp.status_code, 200)
     data = resp.get_json()
     self.assertEqual(data.get('model'), 'regression')
     assert_almost_equal(data.get('result'), [20.])
     # check we can use it to predict tagged version
     resp = self.client.put('/api/v1/model/regression@commit1/predict',
                            json={
                                'columns': ['v'],
                                'data': dict(v=[5]),
                            },
                            auth=self.auth,
                            headers=self._headers)
     self.assertEqual(resp.status_code, 200)
     data = resp.get_json()
     self.assertEqual(data.get('model'), 'regression@commit1')
     assert_almost_equal(data.get('result'), [10.])
예제 #21
0
 def create_result_summary(self, model_name='linear_regression'):
     models = {}
     for key in self.training_data:
         model = LinearRegression()
         if model_name == 'linear_regression':
             model = LinearRegression()
         elif model_name == 'ridge_regression':
             model = Ridge()
         elif model_name == 'kernal_ridge':
             model = KernelRidge()
         model_coef = self.coef[key]
         model.coef_ = model_coef[0]
         model.intercept_ = model_coef[1]
         models[key] = model
     df = self.predict_attraction(self.training_data, models=models)
     df.to_csv("results/{}_result.csv".format(self.exp_name))
예제 #22
0
def load(dir):
    """import a bk model as a sklearn model"""
    meta_f, params_f = _paths(dir)

    meta = json.load(open(meta_f, 'r'))
    type = meta['type']

    # only supports linear regression at the moment
    assert type == 'linear_regression'
    h5f = h5py.File(params_f, 'r')
    coef = h5f['coef'][:]
    intercept = h5f['intercept'][()] # to retrieve scalar values
    h5f.close()

    model = LinearRegression()
    model.coef_ = coef
    model.intercept_ = intercept
    return model
예제 #23
0
def load(dir):
    """import a bk model as a sklearn model"""
    meta_f, params_f = _paths(dir)

    meta = json.load(open(meta_f, 'r'))
    type = meta['type']

    # only supports linear regression at the moment
    assert type == 'linear_regression'
    h5f = h5py.File(params_f, 'r')
    coef = h5f['coef'][:]
    intercept = h5f['intercept'][()]  # to retrieve scalar values
    h5f.close()

    model = LinearRegression()
    model.coef_ = coef
    model.intercept_ = intercept
    return model
예제 #24
0
def linear_regression(X_train, Y_train, xval=None):
    """
	Create linear regression model on data X with labels Y.
	"""
    if not xval:
        model = LinearRegression(fit_intercept=False).fit(
            X_train, log_scale(Y_train))
    else:
        model = LinearRegression(fit_intercept=False)
        results = cross_validate(model,
                                 X_train,
                                 log_scale(Y_train),
                                 cv=xval,
                                 return_estimator=True)
        coefs = np.array([float(m.coef_) for m in results['estimator']])
        avg_coef = np.mean(coefs)
        model.coef_ = np.array([[avg_coef]])
        model.intercept_ = 0
    return model
예제 #25
0
def best_subset_regression(data, dependentVar, factorNames, options):
    '''best_subset_regression takes in a dataset and returns the factor loadings using best subset regression
    INPUTS:
        data: pandas df, data matrix, should constain the date column and all of the factorNames columns
        dependentVar: string, name of dependent variable
        factorNames: list, elements should be strings, names of the independent variables
        options: dictionary, should constain at least two elements, timeperiod, and date
            timeperiod: string, if == all, means use entire dataframe, otherwise filter the df on this value
            date: name of datecol
            returnModel: boolean, if true, returns model
            maxVars: int, maximum number of factors that can have a non zero loading in the resulting regression
    Outputs:
        reg: regression object from sikitlearn
        also prints what was desired
    '''
    #Check dictionary for maxVars option
    if ('maxVars' not in options.keys()):
        print('maxVars not specified in options')
        return

    if (options['timeperiod'] == 'all'):
        newData = data.copy()
    else:
        newData = data.copy()
        newData = newData.query(options['timeperiod'])

    #perform linear regression
    alpha, beta = best_subset(data[factorNames].values,
                              data[dependentVar].values, options['maxVars'])

    if (options['printLoadings'] == True):
        #Now print the results
        print_timeperiod(newData, dependentVar, options)

        #Now print the factor loadings
        display_factor_loadings(alpha, beta, factorNames, options)

    if (options['returnModel']):
        out = LinearRegression()
        out.intercept_ = alpha[0]
        out.coef_ = beta
        return out
예제 #26
0
    def parse_MATLAB_model(model_json):
        """
        Reading in a stored matlab z score model for use in python
        :param model_json: dict read from the JSON saved by MATLAB
        :return: a dict with the necessary information
        """
        # muscle = model_json['Muscle']
        rmse = model_json['RMSE']
        mse = model_json['MSE']
        dfe = model_json['DFE']
        coefficient_cov = model_json['CoeffCov']
        intercept = model_json['Coefficients'][0]
        names = model_json['CoefficientNames'][1:]
        coeff = model_json['Coefficients'][1:]

        lr = LinearRegression()
        lr.coef_ = np.array(coeff)
        lr.intercept_ = intercept

        return {'model': lr, 'coefficient_names': names, 'rmse': rmse, 'dfe': dfe,
                'mse': mse, 'coefficient_cov': np.array(coefficient_cov)}
def calculate_coefficients(star_objects,
                           group,
                           sample_weights=None,
                           set_slope=False):
    """
    Parameters
    --------
        star_objects: list
            Contains a list of Star objects
        group: int, 1 or 0
            1 is slow rotators, 0 is fast
        sample_weights: list, optional, default 1
            containts list of weights of the values w.r.t. the line
        return_linregg: boolean, optional, default False
            Whether to return the object of the linear regression to 
            use things such as predict
        set_slope: boolean, optional, default False
            Sets the slope to 0 
    Returns
    ------
    LinearRegression: class object, optional, default False
    list: [b0,b1,b2] 
    """
    lr = LinearRegression()

    lr.fit(
        [star.predictors for star in star_objects if star.group == group],
        [star.period for star in star_list if star.group == group],
        sample_weights,
    )

    if set_slope == True:
        # sets the coefs to zero
        lr.coef_ = np.zeros(len(star_objects[0].predictors))
        lr.intercept_ = np.average(
            [star.period for star in star_list if star.group == group],
            weights=sample_weights,
        )

    return np.append(lr.intercept_, lr.coef_[1:]), lr
예제 #28
0
 def __init__(self, msg, feature_names=None):
     self.models = []
     self.coef = []
     for m in msg.Structure.Components:
         s = None
         if m.LinearCoeff:
             s = LinearRegression()
             s.intercept_ = m.LinearCoeff.Intercept
             if feature_names is None:
                 s.coef_ = np.zeros(len(m.LinearCoeff.Coeff))
             else:
                 s.coef_ = np.zeros(len(feature_names))
             for i, elem in enumerate(m.LinearCoeff.Coeff):
                 if feature_names is None:
                     s.coef_[i] = elem.Coeff
                 else:
                     l = feature_names.get_loc(elem.Feature)
                     s.coef_[l] = elem.Coeff
         self.models.append(s)
         if m.Coeff:
             self.coef.append(m.Coeff)
         else:
             self.coef.append(1.0)
예제 #29
0
 def __init__(self, msg, feature_names=None):
     self.models = []
     self.coef = []
     for m in msg.Structure.Components:
         s = None
         if m.LinearCoeff:
             s = LinearRegression()
             s.intercept_ = m.LinearCoeff.Intercept
             if feature_names is None:
                 s.coef_ = np.zeros(len(m.LinearCoeff.Coeff))
             else:
                 s.coef_ = np.zeros(len(feature_names))                    
             for i, elem in enumerate(m.LinearCoeff.Coeff):
                 if feature_names is None:
                     s.coef_[i] = elem.Coeff
                 else:
                     l = feature_names.get_loc(elem.Feature)
                     s.coef_[l] = elem.Coeff
         self.models.append(s)
         if m.Coeff:
             self.coef.append(m.Coeff)
         else:
             self.coef.append(1.0)
예제 #30
0
def avaliaRegistros(df, intpt, coef): # Dataframe, coef, intercept
    #pega os parâmetros do df
    x = df.iloc[:,:1].values
    y = df.iloc[:,2:3].values
    #instancia o regressor
    regressor = LinearRegression()
    regressor.fit(x,y)
    regressor.intercept_ = intpt
    regressor.coef_[0] = coef
    #monta o regressor com os parâmetros enviados na função
    derror = y - regressor.predict(x)
    percent = 0
    soma=0
    contaerro=0
    totalleitura = len(derror)
    for leitura in derror:
        soma +=leitura
        if leitura>=0:
            contaerro += 1
    percent = (100*contaerro)/totalleitura
    # Porcentagem de erro Positivo: percent / Soma do erro: soma    
    retornoDesempenho = [soma[0],percent]
    return retornoDesempenho
예제 #31
0
    def strategy_train_opt_mix(self, train, valid):
        df = pd.concat([train.iloc[:50], valid], axis=0)
        y_preds = self._predict(df)
        num = y_preds.shape[1]

        m = LinearRegression(fit_intercept=False)
        m.intercept_ = 0.0

        best_loss = None
        best_coef = None
        for alpha in np.linspace(0.0, 1.0, 101):
            beta = (1 - alpha) / (num - 1)
            coef_ = np.array([alpha] + [beta] * (num - 1))
            assert abs(sum(coef_) - 1.0) < 1e-6
            y_pred = np.sum(np.multiply(y_preds, coef_), axis=1)
            y_pred = pd.Series(y_pred, index=df.index)
            loss = self.loss(y_pred, df['target'], df['weight'])
            if best_loss is None or loss < best_loss:
                best_loss = loss
                best_coef = coef_

        m.coef_ = best_coef
        logging.info(f"OptMix.coef_={m.coef_} best_loss={best_loss:.4f}")
        self.strategy_model = m
예제 #32
0
def build_linear_model(rotation, translation):
    m = LinearRegression()
    m.coef_ = rotation
    m.intercept_ = translation
    return m