예제 #1
0
 def __init__(self,data,forecastMessure,p,d,q):
     Modelling.__init__(self,data,forecastMessure)
     self.data=data
     self.p=p
     self.d=d
     self.q=q
     self.forecastMessure=forecastMessure
def getFlaks():

    # columns values to allow for user selection
    model = Modelling()
    df, user_input_list = model.data_preparation()

    return render_template('main.html', selections=user_input_list)
예제 #3
0
 def __init__(self, data, forecastMessure, seasonal_periods, WStRMSEOpt,
              WMAPEOpt, product):
     Modelling.__init__(self, data, forecastMessure)
     self.data = data
     self.forecastMessure = forecastMessure
     self.seasonal_periods = seasonal_periods
     self.WStRMSEOpt = WStRMSEOpt
     self.WMAPEOpt = WMAPEOpt
     self.product = product
예제 #4
0
def fit(args):
    "fit preprocessor and model"
    data = load_data(DATA_NAME, samp_size=10000, all_=False)

    prep = ChrunPrep()
    X = prep.fit_transform(data)
    y = prep.create_labels(data)

    classifier = Modelling(model=args.model_type)
    classifier.fit(X, y)
class TestModelling(unittest.TestCase):
    """
    Test the Modelling class.
    """
    def setUp(self):
        self.description = pd.read_csv("docs/description.csv")
        self.description_1000 = pd.read_csv("docs/description_1000.csv")
        self.mdl = Modelling(self.description)

    def test_run_lda(self):
        self.mdl.run_lda(10, 100, 10)

    """ def test_run_nmf(self):
예제 #6
0
def predict(args):
    "perdict on a given dataset"
    if not args.private_file:
        data = load_data(DATA_NAME, samp_size=100000, all_=False)
    else:
        data = load_data(args.private_file)

    prep = ChrunPrep()
    X, index = prep.transform(data)

    classifier = Modelling(args.model_type)
    preds = classifier.predict(X)
    maybe_mkdir(args.out_path)
    out_path = os.path.join(args.out_path, "preds.csv")
    pd.Series(preds, index=index).to_csv(out_path, sep=";")
예제 #7
0
def main():

    with open('./data/fm_2000-2019.pkl', 'rb') as handle:
        df = pickle.load(handle)

    start_game = 30
    end_game = 82
    #vegas_years = ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19']
    vegas_years = ['2018-19']
    first_feature = 'gp_all_0_a'
    model_type = 'bayes-normal'
    hp_dict = {'alpha': .05}
    feature_classes = 'all'  #['e-off-rating','e-def-rating','e-pace']
    thresh = .6
    period = 2
    trace_samp = 5000
    burn_in = 2000
    post_samp = 1000
    chains = 4
    cores = 1

    my_model = Modelling(period=period,model_type=model_type,feature_classes=feature_classes,remove_features=[]\
    ,restrict_features=[],hp_dict=hp_dict,normalize=False,trace_samp=trace_samp,burn_in=burn_in,post_samp=post_samp,
    chains=chains,cores=cores)

    vals, mean, var = cross_validate(df,
                                     my_model,
                                     start_game,
                                     end_game,
                                     thresh,
                                     vegas_years,
                                     first_feature,
                                     normalize=False,
                                     ppc=True)
    print(mean, var)
예제 #8
0
    def catboost_model(self,
                       X_train,
                       y_train,
                       X_val,
                       y_val,
                       cv_type='gridsearch'):
        CatBoostClassifier_param = {
            "iterations": [100],  #[100, 1150, 200, 300],     
            "learning_rate": [0.08, 0.09],  # [0.01, 0.03, 0.1, 0.3, 0.5, 1]
            "max_depth": [5],  #[3,5,8]
            "l2_leaf_reg": [5],  #[2,10,15]
        }

        model = Modelling().best_model_fit(X_train=X_train,
                                           y_train=y_train,
                                           X_val=X_val,
                                           y_val=y_val,
                                           clf=CatBoostClassifier(
                                               verbose=0,
                                               loss_function='Logloss',
                                               random_seed=RANDOM_SEED,
                                           ),
                                           param=CatBoostClassifier_param,
                                           cv_type=cv_type)
        return model
예제 #9
0
 def lgb_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'):
     LGBMClassifier_param = {
         "learning_rate": [0.2, 0.3],
         "num_leaves": [10],
         "max_depth": [7, 8],
         "feature_fraction": [0.4, 0.6],
         "lambda": [0.3, 0.4],
         "boosting": ['gbdt', 'dart'],
         "num_boost_round": [100, 120],
         #     "min_gain_to_split": [],
         #     "max_cat_group": [],
         #     "bagging_fraction": [],
         #     "min_data_in_leaf": [],
     }
     model = Modelling().best_model_fit(X_train=X_train,
                                        y_train=y_train,
                                        X_val=X_val,
                                        y_val=y_val,
                                        clf=LGBMClassifier(
                                            application='binary',
                                            metric='binary_logloss',
                                            save_binary=True),
                                        param=LGBMClassifier_param,
                                        cv_type=cv_type)
     return model
    def __init__(self,
                 data,
                 forecastMessure,
                 hidden_layers=[20, 15],
                 activation_functions=['relu', 'relu'],
                 optimizer=SGD(),
                 loss='mean_absolute_error'):
        Modelling.__init__(self, df, forecastMessure)
        self.data = data
        self.forecastMessure = forecastMessure
        self.hidden_layers = hidden_layers
        self.activation_functions = activation_functions
        self.optimizer = optimizer
        self.loss = loss

        if len(self.hidden_layers) != len(self.activation_functions):
            raise Exception(
                "hidden_layers size must match activation_functions size")
def postFlaks():

    # get user's inputs
    user_CRIM = request.form.get("CRIM", False)
    user_ZN = request.form.get("ZN", False)
    user_INDUS = request.form.get("INDUS", False)
    user_CHAS = request.form.get("CHAS", False)
    user_NOX = request.form.get("NOX", False)
    user_RM = request.form.get("RM", False)
    user_AGE = request.form.get("AGE", False)
    user_DIS = request.form.get("DIS", False)
    user_RAD = request.form.get("RAD", False)
    user_TAX = request.form.get("TAX", False)
    user_PTRATIO = request.form.get("PTRATIO", False)
    user_B = request.form.get("B", False)
    user_LSTAT = request.form.get("LSTAT", False)

    # get user's phone nuber
    user_phone_number = request.form.get("phone", False)

    # instantiate the model
    model = Modelling()

    prediction = model.predictUserInput(user_CRIM, user_ZN, user_INDUS,
                                        user_CHAS, user_NOX, user_RM, user_AGE,
                                        user_DIS, user_RAD, user_TAX,
                                        user_PTRATIO, user_B, user_LSTAT)

    # append meaniningful message to the prediction
    prediction = "Predicted Median value of owner-occupied homes in $1000's is: " + prediction

    # if the phone number is valid, then send a message
    snsService = Sns()
    # user_phone_number = str(user_phone_number)
    returnMessage = snsService.sendSMS(user_phone_number, prediction)

    # check if user did not provide phone nubmer, then don't return any message from the SNS class
    if len(user_phone_number) == 0:
        returnMessage = ""

    # prediction = 'RESULT'
    return render_template('prediction.html',
                           result=[prediction, returnMessage])
예제 #12
0
 def dt_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'):
     DecisionTreeClassifier_param = {
         "criterion": ["gini", "entropy"],
         "max_depth": [8],
         "max_features": [0.7],
         "min_samples_leaf": [10],
         "min_samples_split": [3],
         "random_state": [RANDOM_SEED]
     }
     model = Modelling().best_model_fit(X_train=X_train,
                                        y_train=y_train,
                                        X_val=X_val,
                                        y_val=y_val,
                                        clf=DecisionTreeClassifier(),
                                        param=DecisionTreeClassifier_param,
                                        cv_type=cv_type)
     return model
예제 #13
0
 def logit_model(self,
                 X_train,
                 y_train,
                 X_val,
                 y_val,
                 cv_type='gridsearch'):
     LogisticRegression_param = {
         "C": [0.25],
         "max_iter": [135],
         "penalty": ["l2"],
         "random_state": [RANDOM_SEED]
     }
     model = Modelling().best_model_fit(X_train=X_train,
                                        y_train=y_train,
                                        X_val=X_val,
                                        y_val=y_val,
                                        clf=LogisticRegression(),
                                        param=LogisticRegression_param,
                                        cv_type=cv_type)
     return model
예제 #14
0
 def etc_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'):
     ExtraTreesClassifier_param = {
         "n_estimators": [175],
         "criterion": ['gini', 'entropy'],
         "max_depth": [21],
         "min_samples_split": [4],
         "min_samples_leaf": [8],
         "max_features": [0.5],
     }
     model = Modelling().best_model_fit(X_train=X_train,
                                        y_train=y_train,
                                        X_val=X_val,
                                        y_val=y_val,
                                        clf=ExtraTreesClassifier(
                                            random_state=RANDOM_SEED,
                                            bootstrap=True,
                                            n_jobs=-2,
                                            warm_start=True),
                                        param=ExtraTreesClassifier_param,
                                        cv_type=cv_type)
     return model
예제 #15
0
def main(data_directory_path, merge_csv_file_name, prepared_csv_file_name,
         features_target_csv_file_name):

    print("Model Process starts")

    #path = "E:\PlusDental_Task\sample_data"
    #merge_file_name = "data_merged.csv"
    #prepared_file_name = "data_prepared.csv"
    #feature_target_file_name = "features_target.csv"

    start = time.time()

    data_read_and_merge = DataReadAndMerge(data_directory_path,
                                           merge_csv_file_name)
    # data_read_and_merge.readAndMerge(path,merge_file_name)

    data_prepare = DataPrepare(data_directory_path, merge_csv_file_name)
    #data_prepare.dataPrepare(path, merge_file_name)

    #data_prepared = pd.read_csv(os.path.join(data_directory_path, prepared_csv_file_name))
    #print(data_prepared.head())
    #print(data_prepared.shape)

    #data_explore = DataExploration(data_prepared)
    #data_explore.dataExploration(data_prepared)

    feature_engineering = FeatureEngineering(data_directory_path,
                                             prepared_csv_file_name)
    #feature_engineering.featureEngineering(path,prepared_file_name)

    modelling = Modelling(data_directory_path, features_target_csv_file_name)
    #modelling.modelling(data_directory_path, features_target_csv_file_name)

    model_pipeline = ModelPipeline(data_read_and_merge, data_prepare,
                                   feature_engineering, modelling)
    model_pipeline.fit(data_directory_path, merge_csv_file_name,
                       prepared_csv_file_name, features_target_csv_file_name)

    print("Model Process ends", time.time() - start, "s")
예제 #16
0
def main(data_directory_path):

    print("Model Process starts")

    start = time.time()

    data_read = DataRead(data_directory_path)

    data_prepare = DataPrepare()

    data_explore = DataExploration()

    feature_engineering = FeatureEngineering()

    modelling = Modelling()

    model_pipeline = ModelPipeline(data_read, data_explore, data_prepare,
                                   feature_engineering, modelling)

    model_pipeline.fit(data_directory_path)

    print("Model Process ends", time.time() - start, "s")
예제 #17
0
from modelling import Modelling
import pandas as pd

# Set file to process, read it in, and create a Model for it
file_name = "docs/description.csv"
description = pd.read_csv(file_name)
model = Modelling(description)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import utils.helper_models as helper_models
from modelling import Modelling
from xgboost import plot_tree

#path = "E:\PlusDental_Task\presentation\Churn_Modelling.csv"
data_directory_path = "E:\PlusDental_Task\presentation"
features_target_csv_file_name = "Churn_Modelling.csv"

#data_raw = pd.read_csv(path)
#print(data_raw.shape)

# Checking missing values Just for confirmation
# data_check = helper_models.missing_values_table(data_raw)
# print('Missing values in a column with the percentage', data_check)

modelling = Modelling(data_directory_path, features_target_csv_file_name)
modelling.modelling(data_directory_path, features_target_csv_file_name)

 def setUp(self):
     self.description = pd.read_csv("docs/description.csv")
     self.description_1000 = pd.read_csv("docs/description_1000.csv")
     self.mdl = Modelling(self.description)