Exemple #1
0
def load(
    tag: t.Union[str, Tag],
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
) -> "ModelType":
    """
    Load a model from BentoML local modelstore with given name.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.

    Returns:
        :obj:`Any`: an instance of :mod:`statsmodels` that is unpickled from BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml

        model = bentoml.statsmodels.load("holtswinter")

    """
    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )
    model_file = model.path_of(f"{SAVE_NAMESPACE}{PKL_EXT}")
    return sm.load(model_file)
 def load(self):
     try:
         self._model = joblib.load(self._model_path)
         self._model = sm.load(self._model_path)
     except:
         self._model = None
     return self
Exemple #3
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    # initialize the model
    model = sm.load('HttpTrigger/model.pickle')

    codinghours = req.params.get('hours')

    if not codinghours:
        try:
            req_body = req.get_json()
        except ValueError:
            pass
        else:
            name = req_body.get('hours')

    if codinghours:

        # you have to create a DataFrame since the Statsmodels formula interface expects it
        hours = pd.DataFrame({'CodingHours': [0]})
        hours['CodingHours'][0] = int(codinghours)

        # use the model to make predictions on a new value
        coffeecups = model.predict(hours)

        return func.HttpResponse(f"{coffeecups[0]}")

    else:
        return func.HttpResponse(
            "Please pass a the number of coding hours in the request body",
            status_code=400)
Exemple #4
0
    def fitting(self, mdlName, folderPath, method="lbfgs"):

        mdlpath = folderPath + "/" + mdlName
        sys.stdout.flush()

        if not os.path.isfile(mdlpath):

            print("\nestimation of sarimax model " + str(self.order) + "x" +
                  str(self.sorder) + "\n\n")
            sys.stdout.flush()

            try:
                self.fitted = self.mdl.fit(method=method, maxiter=200)

                ### algorithms/methods ###

                #- 'newton' for Newton-Raphson,
                #- 'nm' for Nelder-Mead
                #- 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
                #- 'lbfgs' for limited-memory BFGS with optional box constraints //default
                #- 'powell' for modified Powell's method
                #- 'cg' for conjugate gradient
                #- 'ncg' for Newton-conjugate gradient
                #- 'basinhopping' for global basin-hopping solver

            except Exception as e:
                print("could not estimate model parameters:")
                print(e)
                sys.stdout.flush()
                return

            try:

                self.saveit(folderPath, mdlName)

            except Exception as e:
                print(
                    "saving the model caused problems - please debug the program"
                )
                print(e)
                sys.stdout.flush()
                return
                #self.fit = None
                #self.fit = sm.load(path)

        else:
            self.fitted = sm.load(mdlpath)
            print("model loaded: " + mdlpath)

        try:  #no clue why you can't filter after saving in the same thread
            self.filt = self.mdl.filter(self.fitted.params)
        except:
            print(
                "model fitted and saved, the program will restart for results")
            python = sys.executable
            os.execl(python, python, *sys.argv)
Exemple #5
0
 def load(self, **kwargs):
     """This method loads the information.
     """
     # Load.
     self._raw = sm.load(**kwargs)
     self._resid = self._raw.resid
     self._result = self._init_result()
     self._config = self._init_config()
     # Return.
     return self
Exemple #6
0
def predict(data, formula, model_file):
    """
    Get the probability for all rows
    :param data: DataFrame of the data
    :param formula: formula used in the predictor
    :param model_file: *.pickle file containing the model
    :return: np.array of probabilities
    """
    model = load(model_file)
    _, transformed_data = patsy.dmatrices(formula, data, return_type='dataframe')
    log_odds = (transformed_data * model.params).sum(axis=1)
    return np.exp(log_odds) / (1 + np.exp(log_odds))
def selector_regression(matched_sents, document, str2vec, wc, args={}):
    if len(regression_models) == 0:
        for f in str2vecs:
            regression_models[f.__name__] = sm.load('data/regression_' +
                                                    f.__name__ + ".pickle")

    model = regression_models[str2vec.__name__]

    x = np.stack([vec for _, vec in matched_sents], axis=0)
    x = pd.DataFrame(x)
    x = sm.add_constant(x)
    scores = model.predict(exog=x).values.tolist()
    return select_from_scores(matched_sents, scores, wc, args)
Exemple #8
0
def make_prediction(df, model_name, yield_type='rainfed'):
    # Load model
    trained_model = sm.load('yield_%s_model.pickle' % model_name)

    trend_results = load_yield_trend(yield_type)

    # Get predicted yield and attach it to existing dataframe
    df_predict = df.copy().join(
        trained_model.predict(df).to_frame('predicted_yield_%s_ana' %
                                           yield_type))
    # Add trend term to get yield
    df_predict['predicted_yield_%s'%yield_type] = df_predict['predicted_yield_%s_ana'%yield_type] \
                                + trend_results.predict(df_predict['year'])
    return df_predict
Exemple #9
0
def predict():

    model = load('fitted.pkl')
    print('session data: ', session)
    print(request.args)
    period = request.get_json()
    print(period.keys())

    if model is not None:

        #print(model.summary())
        result = model.forecast(period['h'])
        print(result)
        return jsonpify(result.to_dict())
    else:
        return 'please run train first'
Exemple #10
0
def predict_sequence(save_path, eval_data, order, input_start, input_size, output_size, 
    x_coord, y_coord):
    # start = time.time()
    trained_model = sm.load(f"{save_path}/{x_coord}_{y_coord}.pickle")
    # post_load = time.time()
    model = SARIMAX(eval_data[input_start:input_start+input_size, x_coord, y_coord], order=order)
    # post_create = time.time()
    model_fit = model.filter(trained_model.params)
    # post_filter = time.time()
    
    prediction_wrapper = model_fit.get_prediction(start=0, 
                             end= input_size + output_size - 1, dynamic=input_size)

    post_predict = time.time()

    # print(f"loading time: {post_load - start}")
    # print(f"create time: {post_create - post_load}")
    # print(f"filter time: {post_filter - post_create}")
    # print(f"predict time: {post_predict - post_filter}")
    # print(f"full time: {post_predict - start}") 

    return prediction_wrapper.predicted_mean[-output_size:]
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from patsy import dmatrices, Sum, Diff, Poly

ols_results = sm.load('basic_OLS_results.pickle')

p = 'C(X20, Sum)'
params = ols_results.params
state_key_set = [key for key in params.keys() if p in key]

pvalues = ols_results.pvalues[state_key_set]

for key in state_key_set:
    if pvalues[key] < .05:
        print(key)

print(pvalue for pvalue in pvalues if pvalues.value() < .05)

plt.hist(pvalues.values.tolist(), bins=pd.np.arange(0, 1, .05))
plt.show()
# print(ols_results.pvalues)
Exemple #12
0
    'legend.title_fontsize': 16,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'axes.labelsize': 16,
    'axes.titlesize': 20,
    'figure.dpi': 100
}

matplotlib.rcParams.update(andy_theme)
#############################################

#############################################
#Load in the test data and the xgboost model
test_dat = pd.read_csv('test_data.csv')

model = sm.load('discrete_time.pickle')

#Expand out how many iterations I want to look at
end_time = 52 * 2
test_dat['OrigEvent'] = test_dat['EVENT']
test_explode = discrete_time.explode_data(data=test_dat,
                                          time='WeekTot',
                                          outcome='EVENT',
                                          max_time=end_time,
                                          min_time=end_time,
                                          cum_event='CumEvent')

#Recreating the spline terms
knot_locs = [4, 10, 20, 40, 60, 80]
discrete_time.rcs(test_explode['Time'],
                  knot_locs,
Exemple #13
0
import itertools
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt
import warnings
warnings.filterwarnings('ignore')

# In[35]:

arroz = pd.read_csv("data_real.csv")
arroz['Date'] = pd.to_datetime(arroz['Date'])
arroz = arroz.set_index('Date')

# In[36]:

mod = sm.load('arroz.pickle')

# In[39]:

pred = mod.get_forecast(steps=4 + 6)
pred_ci = pred.conf_int()

# In[40]:

#Producing and visualizing forecasts

pred_uc = mod.get_forecast(steps=6)
pred_ci = pred_uc.conf_int()

ax = arroz.plot(marker='o', label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(
Exemple #14
0
    data = remove_stop_words(data) #needed again as num2word is giving stop words 101 - one hundred and one
    data = remove_obvious(data)
    return data



st.title('Publish or perish: data-driven choice of book keywords for publishing on Amazon')

main_category = st.selectbox("Select the main category of the book: ", ["",'self-help'])

if main_category == 'self-help':
    filename_model = '../../data/topic_model_tfidf_nmf.pickle'
    nmf_model = pickle.load(open(filename_model, 'rb'))
    filename_model = '../../data/topic_model_tfidf.pickle'
    tfidf_model = pickle.load(open(filename_model, 'rb'))
    ols_results = sm.load('../../data/ols.pickle')

    book_title = st.text_area('Enter the title of the book:')
    book_description = st.text_area('Enter the description of the book:')
    # book_labels = st.text_input('Enter the labels of the book:')
    input_text = book_title +' '+book_description
    
    processed_input_text = preprocess(input_text)
    
    
    df_coeff_topics = pd.read_csv('../../data/books_25_pages_author_info_description_genres_topics_top_words_ols_coeff.csv')
    df_coeff_topics = df_coeff_topics.rename(columns = lambda x: x.strip())
    
    #import models
    filename_model = '../../data/topic_model_tfidf_nmf.pickle'
    nmf_model = pickle.load(open(filename_model, 'rb'))
Exemple #15
0
def main(options):
    """Main logic of the script"""
    #
    # Define mapping from column names to pretty names
    #
    columns_map = {'ContraScoreTargetSite': 'Accessibility',
                   'ID': "ID",
                   'MIRZABranchLengthScoreFill': "Conservation",
                   'MIRZAscore': 'MIRZAscore',
                   'distToBoundary': 'Distance to boundary',
                   'flanksG': 'Flanks G',
                   'flanksU': 'Flanks U',
                   'hybrid': "Hybrid",
                   'miRNA': 'miRNA',
                   'precise_type': 'Precise type',
                   'probability_with_bls': "Probability with conservation",
                   'probability_without_bls': "Probability without conservation",
                   'seed_beg': 'Seed start',
                   'seed_end': 'Seed end',
                   'type': 'Type'}
    #
    # Define columns order
    #
    columns_order = ['ID',
                     'miRNA',
                     'seed_beg',
                     'seed_end',
                     'type',
                     'precise_type',
                     'hybrid',
                     'flanksU',
                     'flanksG',
                     'ContraScoreTargetSite',
                     'distToBoundary',
                     'MIRZAscore',
                     'MIRZABranchLengthScoreFill',
                     'probability_without_bls',
                     'probability_with_bls']

    # read the models from pickle
    if options.verbose:
        syserr("Reading data file\n")
    df = pd.read_table(options.input)

    if options.verbose:
        syserr("Adding constant to data\n")
    df['const'] = 1.0


    model_bls =   sm.load(options.model_bls)
    model_nobls = sm.load(options.model_nobls)

    #
    # extract columns...
    #
    columns_bls = model_bls.params.keys().tolist()[1:]
    columns_nobls = model_nobls.params.keys().tolist()[1:]

    #
    # ...and predict and scale probabilities
    #
    if options.verbose:
        syserr("Adding probability to data\n - with BLS\n")
    df['probability_with_bls'] = scaled_logit_inverse(np.dot(df[['const'] + columns_bls].values, model_bls.params.values))
    if options.verbose:
        syserr(" - without BLS\n")
    df['probability_without_bls'] = scaled_logit_inverse(np.dot(df[['const'] + columns_nobls].values, model_nobls.params.values))

    #
    # reorder columns
    #
    if options.verbose:
        syserr("Reordering columns\n")
    df = df[columns_order]
    #
    # and rename columns
    #
    df.columns = [columns_map[col] for col in df.columns]

    if options.verbose:
        syserr("Saving file\n")
    df.to_csv(options.output, sep='\t', index=None, na_rep="NaN")
    if options.verbose:
        syserr("Done\n")
Exemple #16
0
with h5py.File(bin_file, 'a') as f:
    if 'sklearn' in f:
        print('Deleting earlier sklearn predictions')
        del f['sklearn']
    f.create_dataset('sklearn', data=preds)
accuracy.append(accuracy_score(y_test, preds))
precision.append(precision_score(y_test, preds))
recall.append(recall_score(y_test, preds))
f1.append(f1_score(y_test, preds))
sklearn_metrics = np.column_stack([accuracy, precision, recall, f1])

# Prediction statsmodel
accuracy, precision, recall, f1 = [], [], [], []
model_path = data_path / batch / 'models' / img
model_path = model_path / '{}'.format(img + '_statsmodel.pickle')
trained_model = sm.load(str(model_path))
preds = np.round(trained_model.predict(X_test))
with h5py.File(bin_file, 'a') as f:
    if 'statsmodel' in f:
        print('Deleting earlier statsmodel predictions')
        del f['statsmodel']
    f.create_dataset('statsmodel', data=preds)
accuracy.append(accuracy_score(y_test, preds))
precision.append(precision_score(y_test, preds))
recall.append(recall_score(y_test, preds))
f1.append(f1_score(y_test, preds))
statsmodel_metrics = np.column_stack([accuracy, precision, recall, f1])

# Prediction statsmodel
accuracy, precision, recall, f1 = [], [], [], []
model_path = data_path / batch / 'models' / img
Exemple #17
0
def load_yield_trend(yield_type):
    trend_model = sm.load("yield_trend_%s_model.pickle" % yield_type)
    return trend_model
Exemple #18
0
    def do_POST(self):

        print("1: Processing post request")
        self._set_headers()
        global model
        # print("header: {}".format(self.headers))
        # Processing HTTP POST request data
        # print("header type: {}".format((self.headers['Content-Length'])))

        length = int(self.headers['Content-Length'])
        #print("Content Length :{}".format(length))
        post_body = self.rfile.read(length)
        val_json = json.loads(post_body.decode('utf-8'))
        # val = val_json["object"]["battery"]
        battery = val_json["object"]["battery"]
        val = val_json["object"]["people"]
        print("battery_val: {}\npeople: {}".format(battery, val))
        #is_model_updated = val_json["model_updated"]
        print("Received post request")

        if "model_updated" in val_json:
            is_model_updated = val_json["model_updated"]
        else:
            is_model_updated = False

        if is_model_updated:
            self.finish()
            self.connection.close()
            time.sleep(1)
            self.iot_platform_connect(ip_addr=worker_ip, port=12345)
            is_model_updated = False

            # Reload the model
            model = sm.load('recieved.pkl')
            return
        print("1: Finished processing post request")

        # TODO: Apply learned prediction model on the
        # measurement received within the POST request.

        prediction = int(model.forecast())
        print("2: Apply prediction model")
        print("prediction: {}".format(prediction))

        # TODO: Forward the observation received in the
        # POST request to the IoT Platform.

        print("3: Forwarding observation to IoT Platform")
        iot_url = GATEWAY_URL

        gw_conn = http.client.HTTPConnection(iot_url)
        response = gw_conn.connect()
        print("response: {}".format(response))
        print("Connection with gateway established")

        msg = {}
        msg['sensor_id'] = SENSOR_ID
        msg['timestamp'] = int(time.time() * 1000)
        msg['value'] = val
        gw_json_msg = json.dumps(msg)

        gw_headers = {
            "Content-Type": "application/json",
            "Authorization": "Bearer " + DEV_JWT
        }
        gw_conn.request('POST', '/', gw_json_msg, gw_headers)
        """print("Attempting post...")
		full_url = GATEWAY_URL
		while True: 
			response = requests.post(full_url, data = gw_json_msg, headers = gw_headers)
			if response.status_code == 200:
				break
			full_url = '10.195.0.10:8083'
		print("Connection with gateway successfully established")
		"""
        gw_resp = gw_conn.getresponse()
        print("Status: {}".format(gw_resp.status))
        #if gw_response == 200:
        #	pass

        gw_conn.close()
        print("3: Data sent to IoT Platform")

        # TODO: Adjust the downstream sending through LoRaServer API
        # to send the results of applying the prediction model to
        # the measurement received within the POST request.
        # Reference: http://localhost:8080/api#!/DeviceQueueService/Enqueue
        print("4: Sending prediction via LoRaServer API to board")
        api_conn = http.client.HTTPConnection(SERVER_URL)
        api_conn.connect()
        print("Connected to LoRaServer API")

        #predictionB64 = base64.b64encode(str(prediction2).encode('utf-8'))
        predictionB64 = base64.b64encode(bytes([prediction]))
        print("predictionB64 is: " + predictionB64.decode('utf-8'))

        # When the prediction is accomplished, the results is returned
        # to the device via LoRa App Server's API call that enqueues the data.
        # Below is the minimal possible API call returning 200 OK HTTP code.
        data = {}
        dev_queue_item = {}
        dev_queue_item['data'] = predictionB64.decode('utf-8')
        #dev_queue_item['data'] = "10"
        dev_queue_item['fPort'] = device_fPort
        data['deviceQueueItem'] = dev_queue_item
        json_data = json.dumps(data)

        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Grpc-Metadata-Authorization": "Bearer " + JWT
        }
        api_conn.request('POST', api_dev_enqueue_url, json_data, headers)

        # Sending the prediction to the device was requested from API via POST request above.
        # TODO: you need to ensure that the response returned is correct.
        # Reference: https://docs.python.org/2.2/lib/httpresponse-objects.html
        print("Prediction send to board. Waiting for response")
        resp = api_conn.getresponse()

        if resp.status == 200:
            print("Status 200:")
            print("Data sent to enque")
        else:
            print("Data not sent properly:")
            print("Status {}".format(resp.status))
            print("Response: {}".format(resp.reason))

        api_conn.close()
        print("4: Closing connection to LoRaServer API")
        print("-----Finished processing DO_POST()-----")
Exemple #19
0
def loadLogitModel(config):
    return sm.load(config.get("logit_model_file"))
Exemple #20
0
def backward_selected(data, response, remaining, prev=[]):
    """
        based upon algorithm found at: https://planspace.org/20150423-forward_selection_with_statsmodels/
    """
    remain = remaining[:]
    selected = []
    prv = []
    best_formula = ''
    current_score, best_new_score = 0.05, 0.05
    starting_formula = "{response} ~ {selected}"

    for i in range(0, len(prev)):
        prv.append("+".join(prev[i]))
    if len(prv) > 0:
        previous = "+".join(prv)
        if len(previous) > 1:
            previous = previous + '+'
    else:
        previous = '1'

    while remain and current_score == best_new_score:
        current_score = 0.05
        scores_with_candidates = []
        sel = starting_formula.format(response=response,
                                      selected='+'.join(remain),
                                      prev=previous)
        s_file = "b_models/" + sel.replace(response + " ~ ", "") + '.pickle'
        s_file = s_file.replace('+', '')
        if Path(s_file).exists():
            sel_model = sma.load(s_file)
        else:
            if sel == best_formula:
                sel_model = best_model
            else:
                sel_model = sm.ols(sel, data).fit()
                sel_model.save(s_file)
        print("testing base: {}".format(sel), file=file)
        print("testing base: {}".format(sel))
        for candidate in remain:
            s = remain[:]
            s.remove(candidate)
            if len(s) == 0 and previous.endswith('+'):
                previous = previous[:-1]
            formula = starting_formula.format(response=response,
                                              selected='+'.join(s),
                                              prev=previous)
            f_file = "b_models/" + formula.replace(response + " ~ ",
                                                   "") + '.pickle'
            f_file = f_file.replace('+', '')
            if Path(f_file).exists():
                model = sma.load(f_file)
            else:
                model = sm.ols(formula, data).fit()
                model.save(f_file)
            print("testing removal: {}".format(formula), file=file)
            print("testing removal: {}".format(formula))
            prf = sma.stats.anova_lm(model, sel_model)['Pr(>F)'].loc[1]
            print("testing removal: {} result: {}".format(formula, prf),
                  file=file)
            print("testing removal: {} result: {}".format(formula, prf))
            scores_with_candidates.append((prf, candidate, model, formula))
        scores_with_candidates.sort()
        best_new_score, best_candidate, best_model, best_formula = scores_with_candidates.pop(
        )
        if current_score < best_new_score:
            remain.remove(best_candidate)
            selected.append(best_candidate)
            current_score = best_new_score
    if previous[:1] != "+" and len(selected) == 0:
        previous = previous[:-1]
    for s in selected:
        remaining.remove(s)
    formula = starting_formula.format(response=response,
                                      selected='+'.join(remaining),
                                      prev=previous)
    model = sm.ols(formula, data).fit()
    model.save('best_model_backward2.pickle')
    return model, formula, remaining
Exemple #21
0
def get_model():
    model = sm.load(os.path.join(config.MODEL_DIR, 'toll_tsmodel.pkl'))
    return model
        else:
            data = new

# %%
data['date'] = data['date'].apply(
    lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
print(data['date'].describe())
data.describe()

# %%
data = data.sort_values('date').set_index('date', drop=True)
data = data.asfreq(freq="5min")
data.head(5)

# %%
pred = sm.load("../data/input/step2/prediction_model_" + run_date + "-" +
               source_id)

# %%
pred_ci = pred.conf_int()
rcParams['figure.figsize'] = 18, 8
fig, ax = plt.subplots()
ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'],
        label='observed')
ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7)
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1],
                color='k',
                alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('mydata')
Exemple #23
0
def main(options):
    """Main logic of the script"""
    #
    # Define mapping from column names to pretty names
    #
    columns_map = {
        'ContraScoreTargetSite': 'Accessibility',
        'ID': "ID",
        'MIRZABranchLengthScoreFill': "Conservation",
        'MIRZAscore': 'MIRZAscore',
        'distToBoundary': 'Distance to boundary',
        'flanksG': 'Flanks G',
        'flanksU': 'Flanks U',
        'hybrid': "Hybrid",
        'miRNA': 'miRNA',
        'precise_type': 'Precise type',
        'probability_with_bls': "Probability with conservation",
        'probability_without_bls': "Probability without conservation",
        'seed_beg': 'Seed start',
        'seed_end': 'Seed end',
        'type': 'Type'
    }
    #
    # Define columns order
    #
    columns_order = [
        'ID', 'miRNA', 'seed_beg', 'seed_end', 'type', 'precise_type',
        'hybrid', 'flanksU', 'flanksG', 'ContraScoreTargetSite',
        'distToBoundary', 'MIRZAscore', 'MIRZABranchLengthScoreFill',
        'probability_without_bls', 'probability_with_bls'
    ]

    # read the models from pickle
    if options.verbose:
        syserr("Reading data file\n")
    df = pd.read_table(options.input)

    if options.verbose:
        syserr("Adding constant to data\n")
    df['const'] = 1.0

    model_bls = sm.load(options.model_bls)
    model_nobls = sm.load(options.model_nobls)

    #
    # extract columns...
    #
    columns_bls = model_bls.params.keys().tolist()[1:]
    columns_nobls = model_nobls.params.keys().tolist()[1:]

    #
    # ...and predict and scale probabilities
    #
    if options.verbose:
        syserr("Adding probability to data\n - with BLS\n")
    df['probability_with_bls'] = scaled_logit_inverse(
        np.dot(df[['const'] + columns_bls].values, model_bls.params.values))
    if options.verbose:
        syserr(" - without BLS\n")
    df['probability_without_bls'] = scaled_logit_inverse(
        np.dot(df[['const'] + columns_nobls].values,
               model_nobls.params.values))

    #
    # reorder columns
    #
    if options.verbose:
        syserr("Reordering columns\n")
    df = df[columns_order]
    #
    # and rename columns
    #
    df.columns = [columns_map[col] for col in df.columns]

    if options.verbose:
        syserr("Saving file\n")
    df.to_csv(options.output, sep='\t', index=None, na_rep="NaN")
    if options.verbose:
        syserr("Done\n")
def get_model():
    model = sm.load('D:\\Study\\DataScience\\Projects\\DataApps\\toll_tsmodel.pkl')
    return model
Exemple #25
0
import statsmodels.api as sm 
from statsmodels.tsa.arima_model import ARIMA
import pandas as pd

arima_result = sm.load("../analysis/notebooks/models/arima.pickle")
# garch_result= sm.load("../../analysis/notebooks/models/garch.pickle")

def forecast_future(date):
        """accepts date as string and returns int days difference"""
        # calc timedelta in days
        try:
            days = pd.Timedelta(pd.to_datetime(date) - pd.Timestamp.now()).days
            return arima_result.forecast(days)[0][-1]
        except:
            raise "forecast_future was passed an invalid date"

def predict_history(data):

    # need to correct below code
    # need to slice data at 95% and train
    if isinstance(pd.core.series.Series):
        model = ARIMA(data, order=(2,1,2)) # order from our prior calcs

Exemple #26
0
plt.figure()
sns.boxplot(data=x_train['evaluation'])
plt.title('Pawn evaluations boxplot after outlier removal')
plt.savefig('../../report/plots/post_outlier_removal_boxplot.png')

plt.figure()
df['sig_clock_pct'].hist()
plt.title('Clock feature histogram')
plt.xlabel('Clock feature')
plt.ylabel('Count')
plt.savefig('../../report/plots/clock_feature_hist.png')

df.sample(100).to_markdown('../../report/plots/sample_datapoints.md')

pace_of_play = sm.load('../../models/pace_of_play.pckl')

x = pd.DataFrame([x for x in range(1, 180)], columns=['half_move'])
x['half_move_sq'] = x['half_move']**2
x['half_move_inv'] = 1 / x['half_move']
x['half_move_inv_sq'] = x['half_move_inv']**2
y = pace_of_play.predict(x)

plt.figure()
plt.scatter(df['half_move'], df['winner_clock'], alpha=0.05)
plt.plot(x['half_move'], y, 'r--', label='Model')
plt.legend(loc='best')
plt.xlabel('Half-move')
plt.ylabel('Winner clock time percent (sigmoid-transformed)')
plt.title('Chess game winners\' pace of play')
plt.savefig('../../report/plots/pace_of_play_sigmoid.png')
Exemple #27
0
def load_models():
    file_name = "models/test_file_final_stats3.pickle"
    model = sm.load(file_name)
    return model
Exemple #28
0
def get_model(model_id):
    key = key_template.format(id=model_id)
    tmp = '/tmp/{id}.pickle'.format(id=model_id)
    s3.Bucket(bucket).download_file(key, tmp)
    model = sm.load(tmp)
    return model
import statsmodels.api as sm
import pandas as pd
from UtilityFunctions import build_eqn
from patsy import dmatrices, Sum, Diff, Poly
from patsy.builtins import standardize
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

df = sm.load('nan_income_only.pickle')

m = df['X30'].mean()
# Create a new variable that adds 1 to a ratio of two variables, but in such
# a way as to make cases where the denominator is zero less problematic.
df['income'] = df['X13'].copy()
df['x_ratio'] = (df['X30'] + m) / (m + df['X21'])

eqn = build_eqn(df,
                y='income',
                omit=[
                    'x_ratio', 'regressand', 'X13', 'income_present',
                    'any_regressand', 'income'
                ])
print(eqn)

y, X = dmatrices(eqn, data=df, return_type='dataframe')

y_scaler = StandardScaler()
y_scaler.fit(y)
y = y_scaler.transform(y)

X_scaler = StandardScaler()
Exemple #30
0
 def get_model(cls):
     """Get the model object for this instance, loading it if it's not already loaded."""
     if cls.model == None:
         cls.model = sm.load(os.path.join(model_path, "gamma-model.pkl"))
     return cls.model
Exemple #31
0
    def annihilator(self, askP, askS, bidP, bidS, size, ts):
        resampledDF = pd.DataFrame()
        resampledDF['timestamp'] = ts
        resampledDF['size'] = size
        resampledDF.index = resampledDF['timestamp']
        resampledDF.drop(columns='timestamp', inplace=True)
        resampledDF['deltaVtB'] = 0
        resampledDF['deltaVtA'] = 0
        resampledDF['Mt'] = 0
        resampledDF['OIR'] = 0
        for i in range(len(ts) - 1):
            resampledDF['deltaVtB'].iloc[i] = 0 * (bidP[i] < bidP[i + 1]) + (bidS[i] - bidS[i + 1]) * (
                        bidP[i] == bidP[i + 1]) + bidS[1] * (bidP[i] > bidP[i + 1])
            resampledDF['deltaVtA'].iloc[i] = askS[i] * (askP[i] < askP[i + 1]) + (askS[i] - askS[i + 1]) * (
                        askP[i] == askP[i + 1]) + 0 * (askP[i] > askP[i + 1])
            resampledDF['Mt'].iloc[i] = (bidP[i] + askP[i]) / 2
            resampledDF['OIR'].iloc[i] = (bidS[i] - askS[i]) / (bidS[i] + askS[i])

        resampledDF['VOI'] = resampledDF.deltaVtB - resampledDF.deltaVtA
        resampledDF['DeltaVOI'] = resampledDF.VOI.diff()
        resampledDF['TTV'] = resampledDF.Mt * resampledDF['size']
        resampledDF['TPt'] = 0
        resampledDF['TPt'] = resampledDF.Mt.copy()
        resampledDF['Rt'] = 0
        for i in range(len(ts) - 1):
            resampledDF['Rt'].iloc[i] = resampledDF.TPt.iloc[i] - (
                        ((bidP[i] + askP[i]) / 2) + ((bidP[i + 1] + askP[i + 1]) / 2)) / 2
        for i in range(1, len(resampledDF)):
            if resampledDF.loc[resampledDF.index[i], 'size'] == resampledDF.loc[resampledDF.index[i - 1], 'size']:
                resampledDF.loc[resampledDF.index[i], 'TPt'] = resampledDF.loc[resampledDF.index[i - 1], 'TPt']
            else:
                resampledDF.loc[resampledDF.index[i], 'TPt'] = (resampledDF.loc[resampledDF.index[i], 'TTV'] -
                                                                resampledDF.loc[resampledDF.index[i - 1], 'TTV']) / \
                                                               (resampledDF.loc[resampledDF.index[i], 'size'] -
                                                                resampledDF.loc[resampledDF.index[i - 1], 'size'])
        resampledDF['Spread'] = askP[0] - bidP[0]
        resampledDF['VOI0'] = resampledDF['VOI'] / resampledDF['Spread']
        resampledDF['OIR0'] = resampledDF['OIR'] / resampledDF['Spread']
        resampledDF['R0'] = resampledDF['Rt'] / resampledDF['Spread']
        VOIFeatureList = ['VOI0']
        OIRFeatureList = ['OIR0']
        for i in range(1, 6):
            VOIString = 'VOI' + str(i)
            OIRString = 'OIR' + str(i)
            VOIFeatureList.append(VOIString)
            OIRFeatureList.append(OIRString)
            resampledDF[VOIString] = resampledDF['VOI'].shift(i) / resampledDF['Spread']
            resampledDF[OIRString] = resampledDF['OIR'].shift(i) / resampledDF['Spread']
        featureList = VOIFeatureList
        featureList.extend(OIRFeatureList)
        featureList.append('R0')
        resampledDF.dropna(inplace=True)
        X = resampledDF[featureList]
        X = sm.add_constant(X)  ## add an intercept (beta_0) to our model
        if X.shape == (5, 14):
            model = sm.load(self.model)
            prediction = model.predict(X)
            # print('Predictions: ', prediction.iloc[0])
            return prediction.iloc[0]
        else:
            self.logger.warning('Annihilator: Error X format -> ' + str(X.shape))
            return 0
Exemple #32
0
# Relevant IP addresses
worker_ip = '10.195.2.229'
master_ip = '10.195.0.10'

# Download the model
import socket
import os


def get_stuff():

    pass


model = sm.load('recieved.pkl')
print("model loaded")


# Defining an inherited Handler for HTTP requests that is able
# to handle POST requests.
class HandleRequests(BaseHTTPRequestHandler):
    def _set_headers(self):
        self.send_response(200)
        self.send_header('Content-Type', 'application/json')
        self.end_headers()

    def do_POST(self):

        print("1: Processing post request")
        self._set_headers()
def getModelData(num, Maximum):
    """
    Get latest model prediction for selected station
    Return 2-dimensional list of time and percentage of available bikes
    """

    THIS_FOLDER = os.path.dirname(
        os.path.abspath(__file__)) + '/static/pre_model/'

    url = "http://api.openweathermap.org/data/2.5/forecast?id=2964574&APPID=31f19a108384bc317e2d91c5621c791e"
    with urllib.request.urlopen(url) as url:
        data = json.loads(url.read().decode('utf-8-sig'))

    initTimestamp = data["list"][0]["dt"]

    now = datetime.datetime.fromtimestamp(initTimestamp)
    weekday = now.weekday()
    hour = now.hour
    inittime = int(hour + weekday * 24)

    columns = ["STATION_NUMBER", "weather_rain", "weather_temperature"]
    df_prediction = pd.DataFrame(columns=columns)

    df_prediction["STATION_NUMBER"] = df_prediction["STATION_NUMBER"].astype(
        'category')
    df_prediction["weather_rain"] = df_prediction["weather_rain"].astype(
        'float')
    df_prediction["weather_temperature"] = df_prediction[
        "weather_temperature"].astype('float')

    for i in range(8):
        temp = data["list"][i]["main"]["temp"] - 273.15
        try:
            rain = data["list"][i]["rain"]["3h"]
        except KeyError:
            rain = 0
        for j in range(3):
            df_prediction = df_prediction.append(
                {
                    "weather_rain": rain,
                    "weather_temperature": temp,
                    "STATION_NUMBER": num,
                },
                ignore_index=True)

    res = [["Time", "Degree of availablilty"]]

    for i in range(24):
        model_name = THIS_FOLDER + str(i + inittime)

        newlm = smapi.load(model_name)
        stamp = datetime.datetime.fromtimestamp(initTimestamp + 60 * 60 * i)
        time = stamp.strftime("%H: %M")
        pre = [time]

        pre_percentage = int(newlm.predict(df_prediction.take([i])))
        if pre_percentage >= Maximum:
            pre_percentage = Maximum / Maximum
        else:
            pre_percentage = pre_percentage / Maximum

        pre.append(pre_percentage)

        res.append(pre)

    return res