예제 #1
0
 def storeOLSPrediction():
     data = list(
         IgnitionRow.objects.all().order_by('-pub_date')[:1].values())
     two_hours = data[::-1]
     data = pd.DataFrame(two_hours)
     data['pub_date'] = data.apply(lambda x: str(x['pub_date']), axis=1)
     data['pub_date_struct'] = data.apply(
         lambda x: time.strptime(x['pub_date'], "%Y-%m-%d %H:%M:%S.%f%z"),
         axis=1)
     data.index = data.apply(
         lambda x: datetime.fromtimestamp(mktime(x['pub_date_struct'])),
         axis=1)
     data['hour'] = data.apply(lambda x: str(
         time.strptime(x['pub_date'], "%Y-%m-%d %H:%M:%S.%f%z")[3]),
                               axis=1)
     data['day_of_week'] = data.index.map(lambda x: x.weekday())
     data['hour'] = pd.Categorical(data['hour'], categories=list(range(24)))
     data['day_of_week'] = pd.Categorical(data['day_of_week'],
                                          categories=list(range(7)))
     hour_dummies = pd.get_dummies(data['hour'], drop_first=True)
     hour_dummies.columns = [
         'h' + str(elem) for elem in hour_dummies.columns
     ]
     day_of_week_dummies = pd.get_dummies(data['day_of_week'],
                                          drop_first=True)
     day_of_week_dummies.columns = [
         'dow' + str(elem) for elem in day_of_week_dummies.columns
     ]
     data = pd.concat((data, hour_dummies, day_of_week_dummies), axis=1)
     results5 = OLSResults.load("regression_models/ols_9_21_data_5.pickle")
     results25 = OLSResults.load(
         "regression_models/ols_9_21_data_25.pickle")
     results50 = OLSResults.load(
         "regression_models/ols_9_21_data_50.pickle")
     results200 = OLSResults.load(
         "regression_models/ols_9_21_data_200.pickle")
     results500 = OLSResults.load(
         "regression_models/ols_9_21_data_500.pickle")
     preds5 = results5.predict(data)
     preds25 = results25.predict(data)
     preds50 = results50.predict(data)
     preds200 = results200.predict(data)
     preds500 = results500.predict(data)
     preds = [preds5, preds25, preds50, preds200, preds500]
     print("OLS PREDICTIONS: {}".format(preds))
     d = IgnitionRowPredictionOLS(num_players_5=preds[0],
                                  num_players_25=preds[1],
                                  num_players_50=preds[2],
                                  num_players_200=preds[3],
                                  num_players_500=preds[4],
                                  pub_date=timezone.now())
     d.save()
def main(models):
    #models is the list of model names
    r_squareds = []
    coefficients = []
    standerd_errors = []
    residuals = []

    for model in models:

        results = OLSResults.load(
            f'regression_models/return_predictions/{coin}/{model}')
        r_squareds.append(results.rsquared)
        coefficients.append(tuple(results.params))
        standerd_errors.append(tuple(results.bse))
        residuals.append(results.df_resid)

        # model_dict.update({model, [results.rsquared, results.params,results.bse]})

    model_dict = {
        'r_squareds': r_squareds,
        'coefficients': coefficients,
        'standerd_errors': standerd_errors,
        'residuals': residuals
    }

    print(model_dict)

    model_df = pd.DataFrame(model_dict,
                            index=models).sort_values('r_squareds',
                                                      ascending=False)

    print(model_df)

    model_df.to_csv(f'results/return_predictions/{coin}_model_results.csv')
    print('model saved to "model_df.csv"')
	def unpickle(self):
		pkl_file = open('uuid_to_key.pickle', 'rb')
		self.uuid_to_key = pickle.load(pkl_file)
		pkl_file.close()
		pkl_file = open('key_to_uuid.pickle', 'rb')
		self.key_to_uuid = pickle.load(pkl_file)
		pkl_file.close()
		for name in self.allModelName:
			key = name[:-7]
			key = self.uuid_to_key[key]
			model = OLSResults.load('model/' + name)
			self.models[key] = model
def main(models, coin, dependent_variable='return'):
    #models is the list of model names
    r_squareds = []
    coefficients = []
    standerd_errors = []
    residuals = []
    times = []
    formulas = []

    for model in models:
        results = OLSResults.load(
            f'regression_models/{dependent_variable}_predictions/{coin}/{model}'
        )
        r_squareds.append(results.rsquared)
        # coefficients.append(tuple(results.params))
        # standerd_errors.append(tuple(results.bse))
        coefficients.append(results.params)
        standerd_errors.append(results.bse)
        residuals.append(results.df_resid)
        times.append(get_time(model))
        formulas.append(get_formula(model))
    #puts all the import variables into a dictinary
    model_dict = {
        'r_squareds': r_squareds,
        'coefficients': coefficients,
        'standerd_errors': standerd_errors,
        'residuals': residuals,
        'times': times,
        'formulas': formulas
    }

    model_df = pd.DataFrame(model_dict,
                            index=models).sort_values('r_squareds',
                                                      ascending=False)

    model_df.to_pickle(
        f'results/{dependent_variable}_predictions/{coin}_model_results.pickle'
    )
    print(f'model saved to {coin}_model_results.pickle')
예제 #5
0
def get_prediction(centered_filtered, col, side=None):
    model = load_model("models/{}_best.pb".format(col))
    correction_model = OLSResults.load("models/{}_correction.pb".format(col))

    maps = {
        "KneeFlex_maxExtension": (-29.4408212510502, 114.8431545843835),
        "GDI": (36.314492983907, 77.03271217530302),  # singlesided
        "gmfcs": (1, 3),
        "speed": (0.0718863507111867, 1.5259117583433834),
        "cadence": (0.222, 1.71556665023985),
        "SEMLS_dev_residual": (-0.8205001909638112, 3.309054961371647)
    }

    def undo_scaling(y, target_min, target_range):
        return y * target_range + target_min

    preds = []

    video_len = centered_filtered.shape[0]

    cols = x_columns
    if side == "L":
        cols = x_columns_left
    if side == "R":
        cols = x_columns_right

    samples = []
    for nstart in range(0, video_len - 124, 31):
        samples.append(centered_filtered[nstart:(nstart + 124), cols])
        X = np.stack(samples)

    p = model.predict(X)[:, 0]
    p = undo_scaling(p, maps[col][0], maps[col][1])
    p = np.transpose(np.vstack([p, np.ones(p.shape[0])]))
    p = correction_model.predict(pd.DataFrame(p))

    #    reset_keras()# Shouldn't be needed anymore

    return np.mean(p)
예제 #6
0
 def __init__(self, model_file_name, test_file_name):
     self.model_file_name = model_file_name
     self.test_file_name = test_file_name
     self.model = OLSResults.load(self.model_file_name)
     self.testing_set = pd.read_csv(self.test_file_name)
     self.prediction = []
예제 #7
0
from statsmodels.regression.linear_model import OLSResults
from sklearn.externals import joblib

# Load data  --------------------------------------------------------------------------------

# Load batter and player information
df_player_id = pd.read_csv("df_player_id.csv", index_col=0)

df_batting_fromsc_250pa_prop_events = pd.read_csv(
    "df_batting_fromsc_250pa_prop_events.csv", index_col=0)

df_pitching_fromsc_500pa_prop_events = pd.read_csv(
    "df_pitching_fromsc_500pa_prop_events.csv", index_col=0)

# Load models
sm_est_model_onbase_loaded = OLSResults.load(
    "sm_est_model_onbase_saved.pickle")
sm_est_model_walk_loaded = OLSResults.load("sm_est_model_walk_saved.pickle")
sm_est_model_single_loaded = OLSResults.load(
    "sm_est_model_single_saved.pickle")
sm_est_model_double_loaded = OLSResults.load(
    "sm_est_model_double_saved.pickle")
sm_est_model_homerun_loaded = OLSResults.load(
    "sm_est_model_homerun_saved.pickle")
sm_est_model_strikeout_loaded = OLSResults.load(
    "sm_est_model_strikeout_saved.pickle")

# Load scalers
scale_onbase_loaded = joblib.load("scale_onbase_saved.pickle")
scale_walk_loaded = joblib.load("scale_walk_saved.pickle")
scale_single_loaded = joblib.load("scale_single_saved.pickle")
scale_double_loaded = joblib.load("scale_double_saved.pickle")
예제 #8
0
	def unpickle(self):
		for name in self.allModelName:
			key = name[:-7]
			model = OLSResults.load('model/' + name)
			self.models[key] = model
예제 #9
0
 def unpickle(self):
     for name in self.allModelName:
         key = name[:-7]
         model = OLSResults.load('model/' + name)
         self.models[key] = model
예제 #10
0
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLSResults
import numpy as np

nsample = 100
x = np.linspace(0, 10, 100)
X = np.column_stack((x, x**2))
beta = np.array([1, 0.1, 10])
e = np.random.normal(size=nsample)
X = sm.add_constant(X)
y = np.dot(X, beta) + e
model = sm.OLS(y, X)
results = model.fit()
results.save("example.pickle")
new_results = OLSResults.load("example.pickle")
print(new_results.summary())
예제 #11
0
파일: trading.py 프로젝트: FranSerr/apollo
sd = setup_data(gf,
                instrument=instrument,
                pricediff=True,
                log=True,
                trading=True)

sd.head()

sd['intercept'] = 1




models = {}

models['HHLL_LogDiff USD_JPY_highMid-1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-1.h5')
models['HHLL_LogDiff USD_JPY_highMid-2'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-2.h5')
models['HHLL_LogDiff USD_JPY_highMid-3'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-3.h5')
models['HHLL_LogDiff USD_JPY_highMid-4'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-4.h5')
models['HHLL_LogDiff USD_JPY_highMid-5'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-5.h5')
models['HHLL_LogDiff USD_JPY_highMid-6'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-6.h5')

models['HHLL_LogDiff USD_JPY_lowMid-1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-1.h5')
models['HHLL_LogDiff USD_JPY_lowMid-2'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-2.h5')
models['HHLL_LogDiff USD_JPY_lowMid-3'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-3.h5')
models['HHLL_LogDiff USD_JPY_lowMid-4'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-4.h5')
models['HHLL_LogDiff USD_JPY_lowMid-5'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-5.h5')
models['HHLL_LogDiff USD_JPY_lowMid-6'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-6.h5')

models['HHLL_LogDiff USD_JPY_highMid0'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid0.h5')
models['HHLL_LogDiff USD_JPY_highMid1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid1.h5')
 def load_model(self, filename):
     filepath = path.join(self.PICKLES_PATH, filename)
     with open(filepath, 'rb') as f:
         model = OLSResults.load(f)
     return model
예제 #13
0
plt.plot(df_rates["RPC1"] / df_rates["RPC2"])
plt.show()
plt.savefig('Rates_ratio_' + str(aRun) + '_' + args.do_fit + '.pdf')
plt.close()

#X=df_rates[['DT1','DT2','DT3','DT4']]
X = df_rates[['RPC2', 'RPC3', 'RPC4']]
y = df_rates['RPC1']

if fit is True:
    results = smf.ols('RPC1 ~ RPC2 + RPC3 + RPC4', df_rates).fit()
    results.save("model.pickle")
else:
    from statsmodels.regression.linear_model import OLSResults
    results = OLSResults.load("model.pickle")

print(results.summary())
res = results.predict(X)

#print(res)
#print(y)

xy = np.vstack([y, res])
z = gaussian_kde(xy)(xy)

fig, ax = plt.subplots()
sc = ax.scatter(y, res, c=z, s=100, edgecolor='')
plt.title('Predicted vs measured RPC rate')
plt.ylabel('Predicted rate')
plt.xlabel('Measured rate')
예제 #14
0
	def __init__(self, model_file_name, test_file_name):
		self.model_file_name = model_file_name
		self.test_file_name = test_file_name
		self.model = OLSResults.load(self.model_file_name)
		self.testing_set = pd.read_csv(self.test_file_name)
		self.prediction = []
 def _initialize_log_linear_risk_model(self, referenceName, modelName):
     modelResults = OLSResults.load("microsim/data/" + modelName +
                                    ".pickle")
     self._repository[referenceName] = LogLinearRiskFactorModel(
         referenceName, modelResults.params, modelResults.bse,
         modelResults.resid)
예제 #16
0
def OLS_realtime(X_test, OLS_name):
    OLS_name = '../datasets.nosync/' + OLS_name
    linear_results = OLSResults.load(OLS_name)
    ols_predict = linear_results.predict(X_test)
    return (ols_predict)
예제 #17
0
파일: slr_wcat.py 프로젝트: yvak90/WC_AT
pred5.corr(wcat["AT"])
#plt.scatter(x=wcat["Waist"], y=wcat["AT"], color="blue");plt.xlabel="Waist";plt.ylabel="AT"
#plt.plot(wcat["Waist"], pred5, color="red")

res5 = wcat.AT - pred5
sqres5 = res5 * res5
mse5 = np.mean(sqres5)
rmse5 = np.sqrt(mse5)

#from sklearn.linear_model import LinearRegression

model5.save("slr_wcat.pkl")

from statsmodels.regression.linear_model import OLSResults
model = OLSResults.load("slr_wcat.pkl")

#type(new_results)
# saving model to disk

#pickle.dump(model5, open("slr_wcat.pkl","wb"))

# loading model to compare results
#slr_wcat = pickle.load(open("slr_wcat.pkl", "rb"))

x = np.exp(
    model.predict(
        pd.DataFrame([[36, 1296, 46656]],
                     columns=["Waist", "Waist_sq", "Waist_cb"])))
print(float(round(x, 2)))
#print(round(np.exp(model5.predict(pd.DataFrame([[80,6400,512000]], columns=["Waist", "Waist_sq", "Waist_cb"]))),2))