def load( tag: t.Union[str, Tag], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "ModelType": """ Load a model from BentoML local modelstore with given name. Args: tag (:code:`Union[str, Tag]`): Tag of a saved model in BentoML local modelstore. model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`): BentoML modelstore, provided by DI Container. Returns: :obj:`Any`: an instance of :mod:`statsmodels` that is unpickled from BentoML modelstore. Examples: .. code-block:: python import bentoml model = bentoml.statsmodels.load("holtswinter") """ model = model_store.get(tag) if model.info.module not in (MODULE_NAME, __name__): raise BentoMLException( f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}." ) model_file = model.path_of(f"{SAVE_NAMESPACE}{PKL_EXT}") return sm.load(model_file)
def load(self): try: self._model = joblib.load(self._model_path) self._model = sm.load(self._model_path) except: self._model = None return self
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') # initialize the model model = sm.load('HttpTrigger/model.pickle') codinghours = req.params.get('hours') if not codinghours: try: req_body = req.get_json() except ValueError: pass else: name = req_body.get('hours') if codinghours: # you have to create a DataFrame since the Statsmodels formula interface expects it hours = pd.DataFrame({'CodingHours': [0]}) hours['CodingHours'][0] = int(codinghours) # use the model to make predictions on a new value coffeecups = model.predict(hours) return func.HttpResponse(f"{coffeecups[0]}") else: return func.HttpResponse( "Please pass a the number of coding hours in the request body", status_code=400)
def fitting(self, mdlName, folderPath, method="lbfgs"): mdlpath = folderPath + "/" + mdlName sys.stdout.flush() if not os.path.isfile(mdlpath): print("\nestimation of sarimax model " + str(self.order) + "x" + str(self.sorder) + "\n\n") sys.stdout.flush() try: self.fitted = self.mdl.fit(method=method, maxiter=200) ### algorithms/methods ### #- 'newton' for Newton-Raphson, #- 'nm' for Nelder-Mead #- 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS) #- 'lbfgs' for limited-memory BFGS with optional box constraints //default #- 'powell' for modified Powell's method #- 'cg' for conjugate gradient #- 'ncg' for Newton-conjugate gradient #- 'basinhopping' for global basin-hopping solver except Exception as e: print("could not estimate model parameters:") print(e) sys.stdout.flush() return try: self.saveit(folderPath, mdlName) except Exception as e: print( "saving the model caused problems - please debug the program" ) print(e) sys.stdout.flush() return #self.fit = None #self.fit = sm.load(path) else: self.fitted = sm.load(mdlpath) print("model loaded: " + mdlpath) try: #no clue why you can't filter after saving in the same thread self.filt = self.mdl.filter(self.fitted.params) except: print( "model fitted and saved, the program will restart for results") python = sys.executable os.execl(python, python, *sys.argv)
def load(self, **kwargs): """This method loads the information. """ # Load. self._raw = sm.load(**kwargs) self._resid = self._raw.resid self._result = self._init_result() self._config = self._init_config() # Return. return self
def predict(data, formula, model_file): """ Get the probability for all rows :param data: DataFrame of the data :param formula: formula used in the predictor :param model_file: *.pickle file containing the model :return: np.array of probabilities """ model = load(model_file) _, transformed_data = patsy.dmatrices(formula, data, return_type='dataframe') log_odds = (transformed_data * model.params).sum(axis=1) return np.exp(log_odds) / (1 + np.exp(log_odds))
def selector_regression(matched_sents, document, str2vec, wc, args={}): if len(regression_models) == 0: for f in str2vecs: regression_models[f.__name__] = sm.load('data/regression_' + f.__name__ + ".pickle") model = regression_models[str2vec.__name__] x = np.stack([vec for _, vec in matched_sents], axis=0) x = pd.DataFrame(x) x = sm.add_constant(x) scores = model.predict(exog=x).values.tolist() return select_from_scores(matched_sents, scores, wc, args)
def make_prediction(df, model_name, yield_type='rainfed'): # Load model trained_model = sm.load('yield_%s_model.pickle' % model_name) trend_results = load_yield_trend(yield_type) # Get predicted yield and attach it to existing dataframe df_predict = df.copy().join( trained_model.predict(df).to_frame('predicted_yield_%s_ana' % yield_type)) # Add trend term to get yield df_predict['predicted_yield_%s'%yield_type] = df_predict['predicted_yield_%s_ana'%yield_type] \ + trend_results.predict(df_predict['year']) return df_predict
def predict(): model = load('fitted.pkl') print('session data: ', session) print(request.args) period = request.get_json() print(period.keys()) if model is not None: #print(model.summary()) result = model.forecast(period['h']) print(result) return jsonpify(result.to_dict()) else: return 'please run train first'
def predict_sequence(save_path, eval_data, order, input_start, input_size, output_size, x_coord, y_coord): # start = time.time() trained_model = sm.load(f"{save_path}/{x_coord}_{y_coord}.pickle") # post_load = time.time() model = SARIMAX(eval_data[input_start:input_start+input_size, x_coord, y_coord], order=order) # post_create = time.time() model_fit = model.filter(trained_model.params) # post_filter = time.time() prediction_wrapper = model_fit.get_prediction(start=0, end= input_size + output_size - 1, dynamic=input_size) post_predict = time.time() # print(f"loading time: {post_load - start}") # print(f"create time: {post_create - post_load}") # print(f"filter time: {post_filter - post_create}") # print(f"predict time: {post_predict - post_filter}") # print(f"full time: {post_predict - start}") return prediction_wrapper.predicted_mean[-output_size:]
import pandas as pd import statsmodels.api as sm import matplotlib.pyplot as plt from patsy import dmatrices, Sum, Diff, Poly ols_results = sm.load('basic_OLS_results.pickle') p = 'C(X20, Sum)' params = ols_results.params state_key_set = [key for key in params.keys() if p in key] pvalues = ols_results.pvalues[state_key_set] for key in state_key_set: if pvalues[key] < .05: print(key) print(pvalue for pvalue in pvalues if pvalues.value() < .05) plt.hist(pvalues.values.tolist(), bins=pd.np.arange(0, 1, .05)) plt.show() # print(ols_results.pvalues)
'legend.title_fontsize': 16, 'xtick.labelsize': 14, 'ytick.labelsize': 14, 'axes.labelsize': 16, 'axes.titlesize': 20, 'figure.dpi': 100 } matplotlib.rcParams.update(andy_theme) ############################################# ############################################# #Load in the test data and the xgboost model test_dat = pd.read_csv('test_data.csv') model = sm.load('discrete_time.pickle') #Expand out how many iterations I want to look at end_time = 52 * 2 test_dat['OrigEvent'] = test_dat['EVENT'] test_explode = discrete_time.explode_data(data=test_dat, time='WeekTot', outcome='EVENT', max_time=end_time, min_time=end_time, cum_event='CumEvent') #Recreating the spline terms knot_locs = [4, 10, 20, 40, 60, 80] discrete_time.rcs(test_explode['Time'], knot_locs,
import itertools import numpy as np from sklearn.metrics import mean_squared_error from math import sqrt import warnings warnings.filterwarnings('ignore') # In[35]: arroz = pd.read_csv("data_real.csv") arroz['Date'] = pd.to_datetime(arroz['Date']) arroz = arroz.set_index('Date') # In[36]: mod = sm.load('arroz.pickle') # In[39]: pred = mod.get_forecast(steps=4 + 6) pred_ci = pred.conf_int() # In[40]: #Producing and visualizing forecasts pred_uc = mod.get_forecast(steps=6) pred_ci = pred_uc.conf_int() ax = arroz.plot(marker='o', label='observed', figsize=(14, 7)) pred_uc.predicted_mean.plot(
data = remove_stop_words(data) #needed again as num2word is giving stop words 101 - one hundred and one data = remove_obvious(data) return data st.title('Publish or perish: data-driven choice of book keywords for publishing on Amazon') main_category = st.selectbox("Select the main category of the book: ", ["",'self-help']) if main_category == 'self-help': filename_model = '../../data/topic_model_tfidf_nmf.pickle' nmf_model = pickle.load(open(filename_model, 'rb')) filename_model = '../../data/topic_model_tfidf.pickle' tfidf_model = pickle.load(open(filename_model, 'rb')) ols_results = sm.load('../../data/ols.pickle') book_title = st.text_area('Enter the title of the book:') book_description = st.text_area('Enter the description of the book:') # book_labels = st.text_input('Enter the labels of the book:') input_text = book_title +' '+book_description processed_input_text = preprocess(input_text) df_coeff_topics = pd.read_csv('../../data/books_25_pages_author_info_description_genres_topics_top_words_ols_coeff.csv') df_coeff_topics = df_coeff_topics.rename(columns = lambda x: x.strip()) #import models filename_model = '../../data/topic_model_tfidf_nmf.pickle' nmf_model = pickle.load(open(filename_model, 'rb'))
def main(options): """Main logic of the script""" # # Define mapping from column names to pretty names # columns_map = {'ContraScoreTargetSite': 'Accessibility', 'ID': "ID", 'MIRZABranchLengthScoreFill': "Conservation", 'MIRZAscore': 'MIRZAscore', 'distToBoundary': 'Distance to boundary', 'flanksG': 'Flanks G', 'flanksU': 'Flanks U', 'hybrid': "Hybrid", 'miRNA': 'miRNA', 'precise_type': 'Precise type', 'probability_with_bls': "Probability with conservation", 'probability_without_bls': "Probability without conservation", 'seed_beg': 'Seed start', 'seed_end': 'Seed end', 'type': 'Type'} # # Define columns order # columns_order = ['ID', 'miRNA', 'seed_beg', 'seed_end', 'type', 'precise_type', 'hybrid', 'flanksU', 'flanksG', 'ContraScoreTargetSite', 'distToBoundary', 'MIRZAscore', 'MIRZABranchLengthScoreFill', 'probability_without_bls', 'probability_with_bls'] # read the models from pickle if options.verbose: syserr("Reading data file\n") df = pd.read_table(options.input) if options.verbose: syserr("Adding constant to data\n") df['const'] = 1.0 model_bls = sm.load(options.model_bls) model_nobls = sm.load(options.model_nobls) # # extract columns... # columns_bls = model_bls.params.keys().tolist()[1:] columns_nobls = model_nobls.params.keys().tolist()[1:] # # ...and predict and scale probabilities # if options.verbose: syserr("Adding probability to data\n - with BLS\n") df['probability_with_bls'] = scaled_logit_inverse(np.dot(df[['const'] + columns_bls].values, model_bls.params.values)) if options.verbose: syserr(" - without BLS\n") df['probability_without_bls'] = scaled_logit_inverse(np.dot(df[['const'] + columns_nobls].values, model_nobls.params.values)) # # reorder columns # if options.verbose: syserr("Reordering columns\n") df = df[columns_order] # # and rename columns # df.columns = [columns_map[col] for col in df.columns] if options.verbose: syserr("Saving file\n") df.to_csv(options.output, sep='\t', index=None, na_rep="NaN") if options.verbose: syserr("Done\n")
with h5py.File(bin_file, 'a') as f: if 'sklearn' in f: print('Deleting earlier sklearn predictions') del f['sklearn'] f.create_dataset('sklearn', data=preds) accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) sklearn_metrics = np.column_stack([accuracy, precision, recall, f1]) # Prediction statsmodel accuracy, precision, recall, f1 = [], [], [], [] model_path = data_path / batch / 'models' / img model_path = model_path / '{}'.format(img + '_statsmodel.pickle') trained_model = sm.load(str(model_path)) preds = np.round(trained_model.predict(X_test)) with h5py.File(bin_file, 'a') as f: if 'statsmodel' in f: print('Deleting earlier statsmodel predictions') del f['statsmodel'] f.create_dataset('statsmodel', data=preds) accuracy.append(accuracy_score(y_test, preds)) precision.append(precision_score(y_test, preds)) recall.append(recall_score(y_test, preds)) f1.append(f1_score(y_test, preds)) statsmodel_metrics = np.column_stack([accuracy, precision, recall, f1]) # Prediction statsmodel accuracy, precision, recall, f1 = [], [], [], [] model_path = data_path / batch / 'models' / img
def load_yield_trend(yield_type): trend_model = sm.load("yield_trend_%s_model.pickle" % yield_type) return trend_model
def do_POST(self): print("1: Processing post request") self._set_headers() global model # print("header: {}".format(self.headers)) # Processing HTTP POST request data # print("header type: {}".format((self.headers['Content-Length']))) length = int(self.headers['Content-Length']) #print("Content Length :{}".format(length)) post_body = self.rfile.read(length) val_json = json.loads(post_body.decode('utf-8')) # val = val_json["object"]["battery"] battery = val_json["object"]["battery"] val = val_json["object"]["people"] print("battery_val: {}\npeople: {}".format(battery, val)) #is_model_updated = val_json["model_updated"] print("Received post request") if "model_updated" in val_json: is_model_updated = val_json["model_updated"] else: is_model_updated = False if is_model_updated: self.finish() self.connection.close() time.sleep(1) self.iot_platform_connect(ip_addr=worker_ip, port=12345) is_model_updated = False # Reload the model model = sm.load('recieved.pkl') return print("1: Finished processing post request") # TODO: Apply learned prediction model on the # measurement received within the POST request. prediction = int(model.forecast()) print("2: Apply prediction model") print("prediction: {}".format(prediction)) # TODO: Forward the observation received in the # POST request to the IoT Platform. print("3: Forwarding observation to IoT Platform") iot_url = GATEWAY_URL gw_conn = http.client.HTTPConnection(iot_url) response = gw_conn.connect() print("response: {}".format(response)) print("Connection with gateway established") msg = {} msg['sensor_id'] = SENSOR_ID msg['timestamp'] = int(time.time() * 1000) msg['value'] = val gw_json_msg = json.dumps(msg) gw_headers = { "Content-Type": "application/json", "Authorization": "Bearer " + DEV_JWT } gw_conn.request('POST', '/', gw_json_msg, gw_headers) """print("Attempting post...") full_url = GATEWAY_URL while True: response = requests.post(full_url, data = gw_json_msg, headers = gw_headers) if response.status_code == 200: break full_url = '10.195.0.10:8083' print("Connection with gateway successfully established") """ gw_resp = gw_conn.getresponse() print("Status: {}".format(gw_resp.status)) #if gw_response == 200: # pass gw_conn.close() print("3: Data sent to IoT Platform") # TODO: Adjust the downstream sending through LoRaServer API # to send the results of applying the prediction model to # the measurement received within the POST request. # Reference: http://localhost:8080/api#!/DeviceQueueService/Enqueue print("4: Sending prediction via LoRaServer API to board") api_conn = http.client.HTTPConnection(SERVER_URL) api_conn.connect() print("Connected to LoRaServer API") #predictionB64 = base64.b64encode(str(prediction2).encode('utf-8')) predictionB64 = base64.b64encode(bytes([prediction])) print("predictionB64 is: " + predictionB64.decode('utf-8')) # When the prediction is accomplished, the results is returned # to the device via LoRa App Server's API call that enqueues the data. # Below is the minimal possible API call returning 200 OK HTTP code. data = {} dev_queue_item = {} dev_queue_item['data'] = predictionB64.decode('utf-8') #dev_queue_item['data'] = "10" dev_queue_item['fPort'] = device_fPort data['deviceQueueItem'] = dev_queue_item json_data = json.dumps(data) headers = { "Content-Type": "application/json", "Accept": "application/json", "Grpc-Metadata-Authorization": "Bearer " + JWT } api_conn.request('POST', api_dev_enqueue_url, json_data, headers) # Sending the prediction to the device was requested from API via POST request above. # TODO: you need to ensure that the response returned is correct. # Reference: https://docs.python.org/2.2/lib/httpresponse-objects.html print("Prediction send to board. Waiting for response") resp = api_conn.getresponse() if resp.status == 200: print("Status 200:") print("Data sent to enque") else: print("Data not sent properly:") print("Status {}".format(resp.status)) print("Response: {}".format(resp.reason)) api_conn.close() print("4: Closing connection to LoRaServer API") print("-----Finished processing DO_POST()-----")
def loadLogitModel(config): return sm.load(config.get("logit_model_file"))
def backward_selected(data, response, remaining, prev=[]): """ based upon algorithm found at: https://planspace.org/20150423-forward_selection_with_statsmodels/ """ remain = remaining[:] selected = [] prv = [] best_formula = '' current_score, best_new_score = 0.05, 0.05 starting_formula = "{response} ~ {selected}" for i in range(0, len(prev)): prv.append("+".join(prev[i])) if len(prv) > 0: previous = "+".join(prv) if len(previous) > 1: previous = previous + '+' else: previous = '1' while remain and current_score == best_new_score: current_score = 0.05 scores_with_candidates = [] sel = starting_formula.format(response=response, selected='+'.join(remain), prev=previous) s_file = "b_models/" + sel.replace(response + " ~ ", "") + '.pickle' s_file = s_file.replace('+', '') if Path(s_file).exists(): sel_model = sma.load(s_file) else: if sel == best_formula: sel_model = best_model else: sel_model = sm.ols(sel, data).fit() sel_model.save(s_file) print("testing base: {}".format(sel), file=file) print("testing base: {}".format(sel)) for candidate in remain: s = remain[:] s.remove(candidate) if len(s) == 0 and previous.endswith('+'): previous = previous[:-1] formula = starting_formula.format(response=response, selected='+'.join(s), prev=previous) f_file = "b_models/" + formula.replace(response + " ~ ", "") + '.pickle' f_file = f_file.replace('+', '') if Path(f_file).exists(): model = sma.load(f_file) else: model = sm.ols(formula, data).fit() model.save(f_file) print("testing removal: {}".format(formula), file=file) print("testing removal: {}".format(formula)) prf = sma.stats.anova_lm(model, sel_model)['Pr(>F)'].loc[1] print("testing removal: {} result: {}".format(formula, prf), file=file) print("testing removal: {} result: {}".format(formula, prf)) scores_with_candidates.append((prf, candidate, model, formula)) scores_with_candidates.sort() best_new_score, best_candidate, best_model, best_formula = scores_with_candidates.pop( ) if current_score < best_new_score: remain.remove(best_candidate) selected.append(best_candidate) current_score = best_new_score if previous[:1] != "+" and len(selected) == 0: previous = previous[:-1] for s in selected: remaining.remove(s) formula = starting_formula.format(response=response, selected='+'.join(remaining), prev=previous) model = sm.ols(formula, data).fit() model.save('best_model_backward2.pickle') return model, formula, remaining
def get_model(): model = sm.load(os.path.join(config.MODEL_DIR, 'toll_tsmodel.pkl')) return model
else: data = new # %% data['date'] = data['date'].apply( lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")) print(data['date'].describe()) data.describe() # %% data = data.sort_values('date').set_index('date', drop=True) data = data.asfreq(freq="5min") data.head(5) # %% pred = sm.load("../data/input/step2/prediction_model_" + run_date + "-" + source_id) # %% pred_ci = pred.conf_int() rcParams['figure.figsize'] = 18, 8 fig, ax = plt.subplots() ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed') ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7) ax.fill_between(pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=.2) ax.set_xlabel('Date') ax.set_ylabel('mydata')
def main(options): """Main logic of the script""" # # Define mapping from column names to pretty names # columns_map = { 'ContraScoreTargetSite': 'Accessibility', 'ID': "ID", 'MIRZABranchLengthScoreFill': "Conservation", 'MIRZAscore': 'MIRZAscore', 'distToBoundary': 'Distance to boundary', 'flanksG': 'Flanks G', 'flanksU': 'Flanks U', 'hybrid': "Hybrid", 'miRNA': 'miRNA', 'precise_type': 'Precise type', 'probability_with_bls': "Probability with conservation", 'probability_without_bls': "Probability without conservation", 'seed_beg': 'Seed start', 'seed_end': 'Seed end', 'type': 'Type' } # # Define columns order # columns_order = [ 'ID', 'miRNA', 'seed_beg', 'seed_end', 'type', 'precise_type', 'hybrid', 'flanksU', 'flanksG', 'ContraScoreTargetSite', 'distToBoundary', 'MIRZAscore', 'MIRZABranchLengthScoreFill', 'probability_without_bls', 'probability_with_bls' ] # read the models from pickle if options.verbose: syserr("Reading data file\n") df = pd.read_table(options.input) if options.verbose: syserr("Adding constant to data\n") df['const'] = 1.0 model_bls = sm.load(options.model_bls) model_nobls = sm.load(options.model_nobls) # # extract columns... # columns_bls = model_bls.params.keys().tolist()[1:] columns_nobls = model_nobls.params.keys().tolist()[1:] # # ...and predict and scale probabilities # if options.verbose: syserr("Adding probability to data\n - with BLS\n") df['probability_with_bls'] = scaled_logit_inverse( np.dot(df[['const'] + columns_bls].values, model_bls.params.values)) if options.verbose: syserr(" - without BLS\n") df['probability_without_bls'] = scaled_logit_inverse( np.dot(df[['const'] + columns_nobls].values, model_nobls.params.values)) # # reorder columns # if options.verbose: syserr("Reordering columns\n") df = df[columns_order] # # and rename columns # df.columns = [columns_map[col] for col in df.columns] if options.verbose: syserr("Saving file\n") df.to_csv(options.output, sep='\t', index=None, na_rep="NaN") if options.verbose: syserr("Done\n")
def get_model(): model = sm.load('D:\\Study\\DataScience\\Projects\\DataApps\\toll_tsmodel.pkl') return model
import statsmodels.api as sm from statsmodels.tsa.arima_model import ARIMA import pandas as pd arima_result = sm.load("../analysis/notebooks/models/arima.pickle") # garch_result= sm.load("../../analysis/notebooks/models/garch.pickle") def forecast_future(date): """accepts date as string and returns int days difference""" # calc timedelta in days try: days = pd.Timedelta(pd.to_datetime(date) - pd.Timestamp.now()).days return arima_result.forecast(days)[0][-1] except: raise "forecast_future was passed an invalid date" def predict_history(data): # need to correct below code # need to slice data at 95% and train if isinstance(pd.core.series.Series): model = ARIMA(data, order=(2,1,2)) # order from our prior calcs
plt.figure() sns.boxplot(data=x_train['evaluation']) plt.title('Pawn evaluations boxplot after outlier removal') plt.savefig('../../report/plots/post_outlier_removal_boxplot.png') plt.figure() df['sig_clock_pct'].hist() plt.title('Clock feature histogram') plt.xlabel('Clock feature') plt.ylabel('Count') plt.savefig('../../report/plots/clock_feature_hist.png') df.sample(100).to_markdown('../../report/plots/sample_datapoints.md') pace_of_play = sm.load('../../models/pace_of_play.pckl') x = pd.DataFrame([x for x in range(1, 180)], columns=['half_move']) x['half_move_sq'] = x['half_move']**2 x['half_move_inv'] = 1 / x['half_move'] x['half_move_inv_sq'] = x['half_move_inv']**2 y = pace_of_play.predict(x) plt.figure() plt.scatter(df['half_move'], df['winner_clock'], alpha=0.05) plt.plot(x['half_move'], y, 'r--', label='Model') plt.legend(loc='best') plt.xlabel('Half-move') plt.ylabel('Winner clock time percent (sigmoid-transformed)') plt.title('Chess game winners\' pace of play') plt.savefig('../../report/plots/pace_of_play_sigmoid.png')
def load_models(): file_name = "models/test_file_final_stats3.pickle" model = sm.load(file_name) return model
def get_model(model_id): key = key_template.format(id=model_id) tmp = '/tmp/{id}.pickle'.format(id=model_id) s3.Bucket(bucket).download_file(key, tmp) model = sm.load(tmp) return model
import statsmodels.api as sm import pandas as pd from UtilityFunctions import build_eqn from patsy import dmatrices, Sum, Diff, Poly from patsy.builtins import standardize from sklearn.metrics import mean_absolute_error, r2_score from sklearn.preprocessing import StandardScaler df = sm.load('nan_income_only.pickle') m = df['X30'].mean() # Create a new variable that adds 1 to a ratio of two variables, but in such # a way as to make cases where the denominator is zero less problematic. df['income'] = df['X13'].copy() df['x_ratio'] = (df['X30'] + m) / (m + df['X21']) eqn = build_eqn(df, y='income', omit=[ 'x_ratio', 'regressand', 'X13', 'income_present', 'any_regressand', 'income' ]) print(eqn) y, X = dmatrices(eqn, data=df, return_type='dataframe') y_scaler = StandardScaler() y_scaler.fit(y) y = y_scaler.transform(y) X_scaler = StandardScaler()
def get_model(cls): """Get the model object for this instance, loading it if it's not already loaded.""" if cls.model == None: cls.model = sm.load(os.path.join(model_path, "gamma-model.pkl")) return cls.model
def annihilator(self, askP, askS, bidP, bidS, size, ts): resampledDF = pd.DataFrame() resampledDF['timestamp'] = ts resampledDF['size'] = size resampledDF.index = resampledDF['timestamp'] resampledDF.drop(columns='timestamp', inplace=True) resampledDF['deltaVtB'] = 0 resampledDF['deltaVtA'] = 0 resampledDF['Mt'] = 0 resampledDF['OIR'] = 0 for i in range(len(ts) - 1): resampledDF['deltaVtB'].iloc[i] = 0 * (bidP[i] < bidP[i + 1]) + (bidS[i] - bidS[i + 1]) * ( bidP[i] == bidP[i + 1]) + bidS[1] * (bidP[i] > bidP[i + 1]) resampledDF['deltaVtA'].iloc[i] = askS[i] * (askP[i] < askP[i + 1]) + (askS[i] - askS[i + 1]) * ( askP[i] == askP[i + 1]) + 0 * (askP[i] > askP[i + 1]) resampledDF['Mt'].iloc[i] = (bidP[i] + askP[i]) / 2 resampledDF['OIR'].iloc[i] = (bidS[i] - askS[i]) / (bidS[i] + askS[i]) resampledDF['VOI'] = resampledDF.deltaVtB - resampledDF.deltaVtA resampledDF['DeltaVOI'] = resampledDF.VOI.diff() resampledDF['TTV'] = resampledDF.Mt * resampledDF['size'] resampledDF['TPt'] = 0 resampledDF['TPt'] = resampledDF.Mt.copy() resampledDF['Rt'] = 0 for i in range(len(ts) - 1): resampledDF['Rt'].iloc[i] = resampledDF.TPt.iloc[i] - ( ((bidP[i] + askP[i]) / 2) + ((bidP[i + 1] + askP[i + 1]) / 2)) / 2 for i in range(1, len(resampledDF)): if resampledDF.loc[resampledDF.index[i], 'size'] == resampledDF.loc[resampledDF.index[i - 1], 'size']: resampledDF.loc[resampledDF.index[i], 'TPt'] = resampledDF.loc[resampledDF.index[i - 1], 'TPt'] else: resampledDF.loc[resampledDF.index[i], 'TPt'] = (resampledDF.loc[resampledDF.index[i], 'TTV'] - resampledDF.loc[resampledDF.index[i - 1], 'TTV']) / \ (resampledDF.loc[resampledDF.index[i], 'size'] - resampledDF.loc[resampledDF.index[i - 1], 'size']) resampledDF['Spread'] = askP[0] - bidP[0] resampledDF['VOI0'] = resampledDF['VOI'] / resampledDF['Spread'] resampledDF['OIR0'] = resampledDF['OIR'] / resampledDF['Spread'] resampledDF['R0'] = resampledDF['Rt'] / resampledDF['Spread'] VOIFeatureList = ['VOI0'] OIRFeatureList = ['OIR0'] for i in range(1, 6): VOIString = 'VOI' + str(i) OIRString = 'OIR' + str(i) VOIFeatureList.append(VOIString) OIRFeatureList.append(OIRString) resampledDF[VOIString] = resampledDF['VOI'].shift(i) / resampledDF['Spread'] resampledDF[OIRString] = resampledDF['OIR'].shift(i) / resampledDF['Spread'] featureList = VOIFeatureList featureList.extend(OIRFeatureList) featureList.append('R0') resampledDF.dropna(inplace=True) X = resampledDF[featureList] X = sm.add_constant(X) ## add an intercept (beta_0) to our model if X.shape == (5, 14): model = sm.load(self.model) prediction = model.predict(X) # print('Predictions: ', prediction.iloc[0]) return prediction.iloc[0] else: self.logger.warning('Annihilator: Error X format -> ' + str(X.shape)) return 0
# Relevant IP addresses worker_ip = '10.195.2.229' master_ip = '10.195.0.10' # Download the model import socket import os def get_stuff(): pass model = sm.load('recieved.pkl') print("model loaded") # Defining an inherited Handler for HTTP requests that is able # to handle POST requests. class HandleRequests(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() def do_POST(self): print("1: Processing post request") self._set_headers()
def getModelData(num, Maximum): """ Get latest model prediction for selected station Return 2-dimensional list of time and percentage of available bikes """ THIS_FOLDER = os.path.dirname( os.path.abspath(__file__)) + '/static/pre_model/' url = "http://api.openweathermap.org/data/2.5/forecast?id=2964574&APPID=31f19a108384bc317e2d91c5621c791e" with urllib.request.urlopen(url) as url: data = json.loads(url.read().decode('utf-8-sig')) initTimestamp = data["list"][0]["dt"] now = datetime.datetime.fromtimestamp(initTimestamp) weekday = now.weekday() hour = now.hour inittime = int(hour + weekday * 24) columns = ["STATION_NUMBER", "weather_rain", "weather_temperature"] df_prediction = pd.DataFrame(columns=columns) df_prediction["STATION_NUMBER"] = df_prediction["STATION_NUMBER"].astype( 'category') df_prediction["weather_rain"] = df_prediction["weather_rain"].astype( 'float') df_prediction["weather_temperature"] = df_prediction[ "weather_temperature"].astype('float') for i in range(8): temp = data["list"][i]["main"]["temp"] - 273.15 try: rain = data["list"][i]["rain"]["3h"] except KeyError: rain = 0 for j in range(3): df_prediction = df_prediction.append( { "weather_rain": rain, "weather_temperature": temp, "STATION_NUMBER": num, }, ignore_index=True) res = [["Time", "Degree of availablilty"]] for i in range(24): model_name = THIS_FOLDER + str(i + inittime) newlm = smapi.load(model_name) stamp = datetime.datetime.fromtimestamp(initTimestamp + 60 * 60 * i) time = stamp.strftime("%H: %M") pre = [time] pre_percentage = int(newlm.predict(df_prediction.take([i]))) if pre_percentage >= Maximum: pre_percentage = Maximum / Maximum else: pre_percentage = pre_percentage / Maximum pre.append(pre_percentage) res.append(pre) return res