Ejemplo n.º 1
0
 def test_promotion_to_other_db_works(self):
     om = self.om
     other = Omega(mongo_url=om.mongo_url + '_promotest')
     [other.models.drop(name, force=True) for name in other.models.list(include_temp=True)]
     [other.datasets.drop(name, force=True) for name in other.datasets.list(include_temp=True)]
     reg = LinearRegression()
     reg.coef_ = 10
     # try models
     om.models.put(reg, 'mymodel')
     self.assertIn('mymodel', om.models.list())
     self.assertNotIn('mymodel', other.models.list())
     om.models.promote('mymodel', other.models)
     self.assertIn('mymodel', other.models.list())
     # ensure changes only in original
     reg.coef_ = 15
     om.models.put(reg, 'mymodel')
     self.assertNotEqual(om.models.get('mymodel').coef_, other.models.get('mymodel').coef_)
     # try datasets
     om.datasets.put(['foo'], 'foo')
     # -- ensure only in original
     self.assertIn('foo', om.datasets.list())
     self.assertNotIn('foo', other.datasets.list())
     # -- promote to other
     om.datasets.promote('foo', other.datasets)
     self.assertIn('foo', other.datasets.list())
     self.assertEqual(om.datasets.get('foo'), other.datasets.get('foo'))
     # change original ensure copy not changed
     om.datasets.put(['foo'], 'foo', append=True)
     self.assertNotEqual(om.datasets.get('foo'), other.datasets.get('foo'))
Ejemplo n.º 2
0
def run_regression(factors, symbols, dataframe):
    for symbol in symbols:
        if dataframe[symbol].isnull().sum(axis=0) <= 420:
            dataframe[symbol].fillna(method='bfill', inplace = True)
            dataframe[symbol].fillna(method='ffill', inplace = True)
            
            X = dataframe[factors].values
            X1= np.where(np.isnan(X), 0, X)
            y = dataframe[symbol].values
            y1 = np.where(np.isnan(y), 0,y)
            reg = LinearRegression()
            reg.fit(X1, y1)
            reg.coef_=np.append(reg.intercept_,reg.coef_)
            weights.append(reg.coef_)
        
        else:
           df_temp = all_data[np.isfinite(all_data[symbol])]
           X = df_temp[factors].values
           X1= np.where(np.isnan(X), 0, X)
           y = df_temp[symbol].values
           y1 = np.where(np.isnan(y), 0,y)
           reg = LinearRegression()
           reg.fit(X1, y1)
           reg.coef_=np.append(reg.intercept_,reg.coef_)
           weights.append(reg.coef_)
Ejemplo n.º 3
0
def create_diff_oracle_baseline(num_of_train_samples, alpha):
    optimum_coef = get_optimum_model()
    X_good, y_good, X_bad, y_bad = generate_samples(
        num_of_train_samples,
        alpha,
        optimum_coef,
        noise_std_deviation=noise_std_dev)

    # have mixed dataset
    X = np.concatenate((X_good, X_bad))
    y = np.concatenate((y_good, y_bad))

    # Base line model
    baseline_model = LinearRegression(fit_intercept=False)
    baseline_model.fit(X_bad, y_bad)  # just for validating the model
    baseline_model.coef_ = np.random.rand(1, 100)
    baseline_model = baseline_model.fit(X, y)
    #baseline_model = update_model(baseline_model, X, y)

    # oracle
    oracle_model = LinearRegression(fit_intercept=False)
    oracle_model.fit(X_bad, y_bad)  # just for validating the model
    oracle_model.coef_ = np.random.rand(1, 100)
    oracle_model = oracle_model.fit(X_good, y_good)
    #oracle_model = update_model(oracle_model, X_good, y_good)

    baseline_model_loss = np.sqrt(
        (np.square(baseline_model.coef_ - optimum_coef)).sum() /
        len(optimum_coef))
    oracle_model_loss = np.sqrt(
        (np.square(oracle_model.coef_ - optimum_coef)).sum() /
        len(optimum_coef))

    return np.array([baseline_model_loss, oracle_model_loss])
Ejemplo n.º 4
0
 def test_dropversion(self):
     store = self.om.models
     store.register_mixin(ModelVersionMixin)
     reg = LinearRegression()
     reg.coef_ = np.array([2])
     reg.intercept_ = 10
     store.put(reg, 'regmodel', tag='commit1')
     reg.coef_ = np.array([5])
     reg.intercept_ = 0
     store.put(reg, 'regmodel', tag='commit2')
Ejemplo n.º 5
0
 def test_runtime_exporter_export_promote(self):
     om = self.om
     om_restore = self.om_restore
     # dataset
     df = pd.DataFrame({'x': range(10)})
     om.datasets.put(df, 'mydf', append=False)
     # models
     model = LinearRegression()
     model.coef_ = 1
     om.models.put(model, 'mymodel', tag='version1')
     model.coef_ = 2
     om.models.put(model, 'mymodel', tag='version2')
     # job
     code = "print('hello')"
     om.jobs.create(code, 'myjob')
     # export
     OmegaExporter(om).to_archive('/tmp/test', [
         'data/mydf', 'models/mymodel@version1', 'models/mymodel@version2',
         'jobs/myjob'
     ])
     # import
     # -- mock temp bucket retrieval in order to test promotion
     temp_bucket = om[OmegaExporter._temp_bucket]
     self._apply_store_mixin(temp_bucket)
     with patch.object(om_restore, '_get_bucket') as meth:
         meth.return_value = temp_bucket
         OmegaExporter(om_restore).from_archive('/tmp/test',
                                                pattern='data/.*|models/.*',
                                                promote_to=om_restore)
         self.assertIn('mydf', om_restore.datasets.list())
         self.assertIn('mymodel', om_restore.models.list())
         self.assertEqual(len(om_restore.models.revisions('mymodel')), 2)
         # check model versions are as expected
         # -- latest
         mdl = om_restore.models.get('mymodel')
         self.assertIsInstance(mdl, LinearRegression)
         self.assertEqual(mdl.coef_, 2)
         # previous
         mdlv1 = om_restore.models.get('mymodel@version1')
         self.assertIsInstance(mdlv1, LinearRegression)
         self.assertEqual(mdlv1.coef_, 1)
         # check jobs were not restored yet
         self.assertEqual(om_restore.jobs.list(), [])
         # restore jobs explicitly
         # -- note jobs promotion is not supported (pending #218)
         OmegaExporter(om_restore).from_archive('/tmp/test',
                                                pattern='jobs/.*')
         self.assertEqual(om_restore.jobs.list(), ['myjob.ipynb'])
         # ensure models were not touched
         self.assertEqual(len(om_restore.models.revisions('mymodel')), 2)
Ejemplo n.º 6
0
def stagewise_regression(x, y, tolerance=1e-4, max_iterations=1e3, verbose=0):
    model = LinearRegression()
    model.coef_ = np.zeros(x.shape[1])
    model.intercept_ = np.mean(y, axis=0)

    it, corr = 0, tolerance * 2
    while abs(corr) > tolerance:
        it += 1
        res = get_residuals(model, x, y)
        ix, corr = find_max_correlation(res, x)
        cf = get_coeff(x[:, ix], res)
        if cf == 0:
            print("[!!] Coefficient not being updated")
            break
        update_model(model, ix, cf)
        if verbose == 2:
            print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" %
                  (np.dot(res, res), corr, ix, cf))
        if it > max_iterations:
            print("[!!] Max iterations")
            break
    if verbose == 1:
        print("[+] Residuals: %f. Max corr: %f in cord %d, coeff: %f" %
              (np.dot(res, res), corr, ix, cf))
    return model
Ejemplo n.º 7
0
    def sample(self, data, model_name='linear_regression'):
        models = []
        for key in self.training_data:
            model = LinearRegression()
            if model_name == 'linear_regression':
                model = LinearRegression()
            elif model_name == 'ridge_regression':
                model = Ridge()
            elif model_name == 'kernal_ridge':
                model = KernelRidge()
            model_coef = self.coef[key]
            model.coef_ = model_coef[0]
            model.intercept_ = model_coef[1]
            models.append(model)
        X = data.iloc[:, 12:-3]

        X["block"] = data["block"]
        X = X.to_numpy()
        qdt_prediction = []
        for model in tqdm(models):
            y = model.predict(X)

            brate = self.predict_BRate(data, y).to_numpy()

            qdt_prediction.append(brate)
        data["qdt_prediction"] = np.mean(qdt_prediction, axis=0)
        return data
def get_prediction(score_name, tokenizer, model, sentence):

    s = pd.read_pickle(score_name)
    d = s['data']

    coeffs = d.layer_weights[0][-1].values

    intercept = d.layer_weights[0][-1].intercept.values

    new_model = LinearRegression()
    new_model.intercept_ = intercept
    new_model.coef_ = coeffs

    inputs = tokenizer(sentence, return_tensors="pt")
    outputs = model(**inputs,
                    labels=inputs["input_ids"],
                    output_hidden_states=True)

    hiddenStates = outputs.hidden_states

    hiddenStatesLayer = hiddenStates[-1]

    lastWordState = hiddenStatesLayer[-1, :].detach().numpy()

    lastWordState = lastWordState[-1].reshape(1, -1)

    prediction = new_model.predict(lastWordState)

    return prediction
Ejemplo n.º 9
0
def getResult(year, model, odometer, condition, engine, transmission,
              cylinders, drive):
    linreg = LinearRegression()
    # Pretrained coefficients
    linreg.coef_ = np.array([
        4.15688785e+02, -4.43167349e-02, 2.12651230e-10, -4.29025704e-11,
        2.51503707e-10, 8.23214342e+02, -6.41425274e+02, -1.42694245e+03,
        9.45874490e-11, -1.32009177e+02, -3.30702492e+03, -5.98655733e+02,
        -3.25718092e+02, -4.17621623e+02, 9.98182003e+02, 5.17219110e+02,
        6.56854016e+02, 3.71803894e+02, 2.42936761e+03, -1.84882795e+02,
        -5.00999670e+02, 4.94002073e+02, -2.02945042e+03, -5.59599756e+02,
        -2.05897742e+03, -2.34529423e+03, 4.04687899e+02, 1.52142986e+03,
        9.98071313e+02, 1.98711159e+02, -8.07920998e+02, -1.82481530e+03,
        1.21496766e+03, 8.08414295e+02, -2.64840938e+03, 5.51285004e+03,
        -1.12757053e+03, -8.81546752e+02, -1.11122893e+03, 3.93289308e+02,
        9.87711205e+02, 2.20741028e+03, 1.33915108e+03, -3.31410140e+02,
        -8.48852168e+02, -1.15545497e+02
    ])

    linreg.intercept_ = -818769.414838612

    input_arr = processInputs(year, model, odometer, condition, engine,
                              transmission, cylinders, drive)
    result = linreg.predict(input_arr)
    return result[0]
Ejemplo n.º 10
0
 def test_ort_gradient_optimizers_use_numpy_nan_w(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e3)
     self.assertRaise(
         lambda: train_session.fit(
             X_train, y_train, w_train, use_numpy=True), ConvergenceError)
Ejemplo n.º 11
0
def get_stack(X, Y):
        
    ridge_file = 'vif ridge2019-02-28 18/10/09.790796.sav'
    ridge_file = ridge_file.replace('/', ':') #python is looking for colons, not slashes
    ridge = pickle.load(open(ridge_file, 'rb'))
    
    linear_file = 'vif linear2019-02-28 18/10/09.871399.sav'
    linear_file = linear_file.replace('/', ':') #python is looking for colons, not slashes
    linear = pickle.load(open(linear_file, 'rb'))
    
    final_model = LinearRegression()
    
    final_model.intercept_ = (ridge.intercept_ + linear.intercept_)/2
    final_model.coef_ = (ridge.coef_ + linear.coef_)/2
    
    final_model.predict(X)
    final_model.score(X, Y)
    
    suffix = str(datetime.datetime.now())
    model_filename = 'vif stack' + suffix +'.sav'
    pickle.dump(linear, open(model_filename, 'wb'))
    csv_filename = 'vif stack ' + suffix + '.csv'
    
    raw_test, test_IDs = load_test()
    predict = final_model.predict(raw_test)
    predict = np.exp(predict)
    predict = pd.DataFrame(predict)
    predict = pd.concat([test_IDs, predict], axis = 1)
    predict.columns = ['Id', 'SalePrice']
    predict.to_csv(csv_filename, index=False)
Ejemplo n.º 12
0
 def test_ort_gradient_optimizers_use_numpy_w_l1(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype)
     X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w)
     reg = LinearRegression()
     reg.fit(X_train, y_train, sample_weight=w_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx, weight_name='weight', score_name='l1')
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(onx_loss,
                                          inits,
                                          learning_rate=1e-3)
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, w_train, use_numpy=True)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='invscaling'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Ejemplo n.º 13
0
def deserialize_linear_regressor(model_dict):
    model = LinearRegression(model_dict['params'])

    model.coef_ = np.array(model_dict['coef_'])
    model.intercept_ = np.array(model_dict['intercept_'])

    return model
Ejemplo n.º 14
0
def predict(load_data, start_date, end_date, model, y_column='Load'):
    coefficients = model['coefficients']
    intercept = model['intercept']
    x_columns = model['x_columns']
    corrected_column = model['corrected_column']

    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    test_data_df = load_data[(load_data['Date'] >= start_date)
                             & (load_data['Date'] <= end_date)]

    x_test = test_data_df[x_columns]

    regressor = LinearRegression()
    regressor.coef_ = np.array(coefficients)
    regressor.intercept_ = np.array(intercept)

    try:
        y_pred = regressor.predict(x_test)
    except Exception as predict_error:
        raise predict_error

    if (corrected_column != None):
        y_pred = test_data_df[corrected_column] - y_pred

    return y_pred.tolist()
Ejemplo n.º 15
0
def load_model(path):
    with open(path, 'r') as model_file:
        model_dict = json.loads(model_file.read())
    model = LinearRegression()
    model.coef_ = np.array(model_dict['coef'])
    model.intercept_ = np.array(model_dict['intercept'])
    return model
Ejemplo n.º 16
0
def deserialize_linear_regressor(model_dict):
    model = LinearRegression(model_dict["params"])

    model.coef_ = np.array(model_dict["coef_"])
    model.intercept_ = np.array(model_dict["intercept_"])

    return model
Ejemplo n.º 17
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    # Fill Tau21 profile
    profile = fill_profile(data, VAR_TAU21)

    # Fit profile
    fit = ROOT.TF1('fit', 'pol1', *FIT_RANGE)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * FIT_RANGE[0]])
    ddt.offset_ = np.array([coef_val * FIT_RANGE[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt.pkl.gz')

    return 0
Ejemplo n.º 18
0
 def test_ort_gradient_optimizers_optimal_use_ort(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     train_session = OrtGradientOptimizer(
         onx_loss,
         inits,
         max_iter=10,
         learning_rate=LearningRateSGD(learning_rate='optimal'))
     self.assertRaise(lambda: train_session.get_state(), AttributeError)
     train_session.fit(X_train, y_train, use_numpy=False)
     state_tensors = train_session.get_state()
     self.assertEqual(len(state_tensors), 2)
     r = repr(train_session)
     self.assertIn("OrtGradientOptimizer(model_onnx=", r)
     self.assertIn("learning_rate='optimal'", r)
     losses = train_session.train_losses_
     self.assertGreater(len(losses), 1)
     self.assertFalse(any(map(numpy.isnan, losses)))
Ejemplo n.º 19
0
 def test_ort_gradient_optimizers_use_numpy_nesterov(self):
     from onnxcustom.utils.orttraining_helper import add_loss_output
     from onnxcustom.training.optimizers import OrtGradientOptimizer
     X, y = make_regression(  # pylint: disable=W0632
         100,
         n_features=10,
         bias=2,
         random_state=0)
     X = X.astype(numpy.float32)
     y = y.astype(numpy.float32)
     X_train, _, y_train, __ = train_test_split(X, y)
     reg = LinearRegression()
     reg.fit(X_train, y_train)
     reg.coef_ = reg.coef_.reshape((1, -1))
     onx = to_onnx(reg,
                   X_train,
                   target_opset=opset,
                   black_op={'LinearRegressor'})
     set_model_props(onx, {'info': 'unit test'})
     onx_loss = add_loss_output(onx)
     inits = ['intercept', 'coef']
     self.assertRaise(
         lambda: OrtGradientOptimizer(
             onx_loss, inits, learning_rate="Nesterov"),
         NotImplementedError)
def predict(instance, coef_, intercept_):
    # input: instance matrix, coef_ array and intercept_ array
    # ouput: list of predictions for input instances
    regressor = LinearRegression(fit_intercept=True)
    regressor.coef_ = coef_
    regressor.intercept_ = intercept_
    predictions = regressor.predict(instance)
    return predictions
Ejemplo n.º 21
0
 def test_model_multiple_versioned_export(self):
     om = self.om
     om_restore = self.om_restore
     model = LinearRegression()
     model.coef_ = 1
     om.models.put(model, 'mymodel', tag='version1')
     model.coef_ = 2
     om.models.put(model, 'mymodel', tag='version2')
     print(om.models.get('mymodel@version1').coef_)
     print(om.models.get('mymodel@version2').coef_)
     # save specific versions
     om.models.to_archive('mymodel@version1', '/tmp/test')
     om.models.to_archive('mymodel@version2', '/tmp/test')
     om.models.drop('mymodel', force=True)
     # restore specific versions
     om_restore.models.from_archive('/tmp/test', 'mymodel@version1')
     om_restore.models.from_archive('/tmp/test', 'mymodel@version2')
     # check we can access specific versions, as restored
     mdl = om_restore.models.get('mymodel@version1')
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 1)
     mdl = om_restore.models.get('mymodel@version2')
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 2)
     # we don't expect a versioned base model
     self.assertNotIn('mymodel', om.models.list())
     self.assertNotIn('mymodel@version1', om.models.list())
     self.assertNotIn('mymodel@version2', om.models.list())
     # promote from restore bucket into a versioned model
     om_restore.models.promote('mymodel@version1', om.models)
     om_restore.models.promote('mymodel@version2', om.models)
     # check the model is actually versioned
     mdl = om.models.get('mymodel@latest')
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 2)
     self.assertEqual(om.models.list(hidden=True), ['mymodel'])
     self.assertNotIn('mymodel@version1', om.models.list())
     self.assertNotIn('mymodel@version2', om.models.list())
     self.assertEqual(len(om.models.revisions('mymodel')), 2)
     # check we can access the versioned model
     mdl = om.models.get('mymodel@version1')
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 1)
     mdl = om.models.get('mymodel@version2')
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 2)
Ejemplo n.º 22
0
def LR_predict():
    X = json.loads(request.form['X'])
    params = json.loads(request.form['params'])
    reg = LinearRegression()
    reg.coef_ = np.array(params['coef'])
    reg.intercept_ = params['inter']
    y = reg.predict(X)
    return jsonify(pred=list(y))
Ejemplo n.º 23
0
def createModel(company):
    # query the past day's news 
    # news_dict = query_news_articles(company, prev_date, curr_date, trading_dates, all_sources)
      
    # get company ticker
    # ticker = df[df['Name'] == company]['Symbol'].values[0]
    ticker = ticker_dict[company]

    # create model
    MSE_list_AR, MSE_list_ADL, intercept_AR, intercept_ADL, coef_AR, coef_ADL,\
               best_AR_train_index, best_AR_test_index, best_ADL_train_index, best_ADL_test_index = main_read_in_csv(ticker)
    # AR model
    model_AR = LinearRegression(normalize=True)
    model_AR.intercept_ = intercept_AR
    model_AR.coef_ = coef_AR

    # ADL model
    model_ADL = LinearRegression(normalize=True)
    model_ADL.intercept_ = intercept_ADL
    model_ADL.coef_ = coef_ADL

    # predict values for tomorrow  
    prediction = {}
    prediction['AR'] = predict_next_value(ticker, company, model_AR, is_ADL=False)
    prediction['ADL'] = predict_next_value(ticker, company,model_ADL, is_ADL=True)

    plot_AR = plot_AR_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index)
    plot_ADL = plot_ADL_model(ticker, best_ADL_train_index, best_ADL_test_index, best_AR_train_index, best_AR_test_index)

    # plot dict
    plot_dict = {}
    plot_dict['MSE_labels'] = [1,2,3,4,5,6,7,8]
    plot_dict['MSE_AR_values'] = MSE_list_AR
    plot_dict['MSE_ADL_values'] = MSE_list_ADL
    plot_dict['comp_AR_label'] = plot_AR['x_val']
    plot_dict['comp_ADL_label'] = plot_ADL['x_val']
    plot_dict['comp_AR_actual'] = plot_AR['y_actual']
    plot_dict['comp_AR_predict'] = plot_AR['y_predict']
    plot_dict['comp_ADL_actual'] = plot_ADL['y_actual']
    plot_dict['comp_ADL_predict'] = plot_ADL['y_predict']

    return plot_dict, prediction
Ejemplo n.º 24
0
 def test_via_runtime(self):
     store = self.om.models
     store.register_mixin(ModelVersionMixin)
     reg = LinearRegression()
     reg.coef_ = np.array([2])
     reg.intercept_ = 10
     store.put(reg, 'regmodel', tag='commit1')
     reg.coef_ = np.array([5])
     reg.intercept_ = 0
     store.put(reg, 'regmodel', tag='commit2')
     # via past version pointer
     r1 = self.om.runtime.model('regmodel^').predict([10]).get()
     r2 = self.om.runtime.model('regmodel').predict([10]).get()
     self.assertEqual(r1[0], 10 * 2 + 10)
     self.assertEqual(r2[0], 10 * 5 + 0)
     # via version tag
     r1 = self.om.runtime.model('regmodel@commit1').predict([10]).get()
     r2 = self.om.runtime.model('regmodel@commit2').predict([10]).get()
     self.assertEqual(r1[0], 10 * 2 + 10)
     self.assertEqual(r2[0], 10 * 5 + 0)
Ejemplo n.º 25
0
 def test_model_export_multiversion(self):
     om = self.om
     om_restore = self.om_restore
     model = LinearRegression()
     model.coef_ = 1
     model = LinearRegression()
     model.coef_ = 2
     om.models.put(model, 'mymodel', tag='latest')
     om.models.put(model, 'mymodel', tag='version2')
     om.models.to_archive('mymodel', '/tmp/test')
     om.models.drop('mymodel', force=True)
     om.models.from_archive('/tmp/test', 'mymodel')
     mdl = om.models.get('mymodel')
     self.assertIsInstance(mdl, LinearRegression)
     om_restore.models.from_archive('/tmp/test', 'mymodel')
     mdl = om_restore.models.get('mymodel')
     # we expect the latest version
     self.assertIsInstance(mdl, LinearRegression)
     self.assertEqual(mdl.coef_, 2)
     self.assertNotIn('versions',
                      om_restore.models.metadata('mymodel').attributes)
Ejemplo n.º 26
0
 def __init__(self, msg, feature_names=None):
     self.models = []
     self.coef = []
     for m in msg.Structure.Components:
         s = None
         if m.LinearCoeff:
             s = LinearRegression()
             s.intercept_ = m.LinearCoeff.Intercept
             if feature_names is None:
                 s.coef_ = np.zeros(len(m.LinearCoeff.Coeff))
             else:
                 s.coef_ = np.zeros(len(feature_names))
             for i, elem in enumerate(m.LinearCoeff.Coeff):
                 if feature_names is None:
                     s.coef_[i] = elem.Coeff
                 else:
                     l = feature_names.get_loc(elem.Feature)
                     s.coef_[l] = elem.Coeff
         self.models.append(s)
         if m.Coeff:
             self.coef.append(m.Coeff)
         else:
             self.coef.append(1.0)
Ejemplo n.º 27
0
 def __init__(self, msg, feature_names=None):
     self.models = []
     self.coef = []
     for m in msg.Structure.Components:
         s = None
         if m.LinearCoeff:
             s = LinearRegression()
             s.intercept_ = m.LinearCoeff.Intercept
             if feature_names is None:
                 s.coef_ = np.zeros(len(m.LinearCoeff.Coeff))
             else:
                 s.coef_ = np.zeros(len(feature_names))                    
             for i, elem in enumerate(m.LinearCoeff.Coeff):
                 if feature_names is None:
                     s.coef_[i] = elem.Coeff
                 else:
                     l = feature_names.get_loc(elem.Feature)
                     s.coef_[l] = elem.Coeff
         self.models.append(s)
         if m.Coeff:
             self.coef.append(m.Coeff)
         else:
             self.coef.append(1.0)
Ejemplo n.º 28
0
 def runRegression(purchases):
     # define the model
     lrModel = LinearRegression()
     # dummy training to initialize weights and biases of the model
     lrModel.fit(np.array([0]).reshape(-1, 1), [1])
     # Assigning trained weights and biases to the model
     lrModel.coef_ = np.array([[0.08037347]])  # weights
     lrModel.bias_ = np.array([3.68091473])  # bias
     example_instance = np.array([purchases
                                  ]).reshape(-1, 1)  # [NUMBER OF PURCHASES]
     # Test the model
     prediction = lrModel.predict(example_instance)
     print(prediction)
     return prediction[0].item()
Ejemplo n.º 29
0
    def LotFrontage_imputer(self):
        #linear regression for lotfrontage vs lotarea after removing outliers, setting a max at 200 based on visualization
        lr = LinearRegression()
        lr.coef_ = np.array([0.00215388])
        lr.intercept_ = 48.640713607035664

        impute_pred = pd.DataFrame(lr.predict(
            self.df.LotArea[self.df.LotFrontage.isnull()].values.reshape(
                -1, 1)),
                                   columns=['LR_Pred'])
        impute_pred['Max'] = 200

        self.df.loc[self.df.LotFrontage.isnull(),
                    'LotFrontage'] = impute_pred.min(1).values
Ejemplo n.º 30
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    #variable = VAR_TAU21
    variable = VAR_N2
    #variable = VAR_DECDEEP
    #variable = VAR_DEEP

    # Fill variable profile
    profile = fill_profile(data, variable)

    # Fit profile
    if variable == VAR_N2:
        fit_range = FIT_RANGE_N2
    elif variable == VAR_TAU21:
        fit_range = FIT_RANGE_TAU21
    elif variable == VAR_DECDEEP:
        fit_range = FIT_RANGE_DECDEEP
    elif variable == VAR_DEEP:
        fit_range = FIT_RANGE_DEEP
    else:
        print "variable invalid"
        return 0
    fit = ROOT.TF1('fit', 'pol1', *fit_range)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * fit_range[0]])
    ddt.offset_ = np.array([coef_val * fit_range[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt_{}.pkl.gz'.format(variable))
    print "got to the end of main()"
    return 0
Ejemplo n.º 31
0
def best_subset_regression(data, dependentVar, factorNames, options):
    """Return the factor loadings using best subset regression.

    INPUTS:
        data: pandas df, data matrix, should constain the date column
        and all of the factorNames columns
        dependentVar: string, name of dependent variable
        factorNames: list, elements should be strings, names of the
        independent variables
        options: dictionary, should constain at least two elements,
        timeperiod, and date
            timeperiod: string, if == all, means use entire dataframe,
            otherwise filter the df on this value
            date: name of datecol
            returnModel: boolean, if true, returns model
            maxVars: int, maximum number of factors that can have a
            non zero loading in the resulting regression
            printLoadings: boolean, if true, prints the coeficients
    Outputs:
        reg: regression object from sikitlearn
        also prints what was desired
    """
    # Check dictionary for maxVars option
    if ('maxVars' not in options.keys()):
        print('maxVars not specified in options')
        return

    if (options['timeperiod'] == 'all'):
        newData = data.copy()
    else:
        newData = data.copy()
        newData = newData.query(options['timeperiod'])

    # this is error because we do not have cvxpy in Anaconda, so best_subset
    # is commented out
    alpha, beta = best_subset(data[factorNames].values,
                              data[dependentVar].values, options['maxVars'])
    beta[np.abs(beta) <= 1e-7] = 0.0

    if (options['printLoadings']):
        print_timeperiod(newData, dependentVar, options)
        print('Max Number of Non-Zero Variables is ' + str(options['maxVars']))
        display_factor_loadings(alpha, beta, factorNames, options)

    if (options['returnModel']):
        out = LinearRegression()
        out.intercept_ = alpha[0]
        out.coef_ = beta
        return out
Ejemplo n.º 32
0
def load(dir):
    """import a bk model as a sklearn model"""
    meta_f, params_f = _paths(dir)

    meta = json.load(open(meta_f, 'r'))
    type = meta['type']

    # only supports linear regression at the moment
    assert type == 'linear_regression'
    h5f = h5py.File(params_f, 'r')
    coef = h5f['coef'][:]
    intercept = h5f['intercept'][()] # to retrieve scalar values
    h5f.close()

    model = LinearRegression()
    model.coef_ = coef
    model.intercept_ = intercept
    return model