def __init__(self,data,forecastMessure,p,d,q): Modelling.__init__(self,data,forecastMessure) self.data=data self.p=p self.d=d self.q=q self.forecastMessure=forecastMessure
def getFlaks(): # columns values to allow for user selection model = Modelling() df, user_input_list = model.data_preparation() return render_template('main.html', selections=user_input_list)
def __init__(self, data, forecastMessure, seasonal_periods, WStRMSEOpt, WMAPEOpt, product): Modelling.__init__(self, data, forecastMessure) self.data = data self.forecastMessure = forecastMessure self.seasonal_periods = seasonal_periods self.WStRMSEOpt = WStRMSEOpt self.WMAPEOpt = WMAPEOpt self.product = product
def fit(args): "fit preprocessor and model" data = load_data(DATA_NAME, samp_size=10000, all_=False) prep = ChrunPrep() X = prep.fit_transform(data) y = prep.create_labels(data) classifier = Modelling(model=args.model_type) classifier.fit(X, y)
class TestModelling(unittest.TestCase): """ Test the Modelling class. """ def setUp(self): self.description = pd.read_csv("docs/description.csv") self.description_1000 = pd.read_csv("docs/description_1000.csv") self.mdl = Modelling(self.description) def test_run_lda(self): self.mdl.run_lda(10, 100, 10) """ def test_run_nmf(self):
def predict(args): "perdict on a given dataset" if not args.private_file: data = load_data(DATA_NAME, samp_size=100000, all_=False) else: data = load_data(args.private_file) prep = ChrunPrep() X, index = prep.transform(data) classifier = Modelling(args.model_type) preds = classifier.predict(X) maybe_mkdir(args.out_path) out_path = os.path.join(args.out_path, "preds.csv") pd.Series(preds, index=index).to_csv(out_path, sep=";")
def main(): with open('./data/fm_2000-2019.pkl', 'rb') as handle: df = pickle.load(handle) start_game = 30 end_game = 82 #vegas_years = ['2013-14','2014-15','2015-16','2016-17','2017-18','2018-19'] vegas_years = ['2018-19'] first_feature = 'gp_all_0_a' model_type = 'bayes-normal' hp_dict = {'alpha': .05} feature_classes = 'all' #['e-off-rating','e-def-rating','e-pace'] thresh = .6 period = 2 trace_samp = 5000 burn_in = 2000 post_samp = 1000 chains = 4 cores = 1 my_model = Modelling(period=period,model_type=model_type,feature_classes=feature_classes,remove_features=[]\ ,restrict_features=[],hp_dict=hp_dict,normalize=False,trace_samp=trace_samp,burn_in=burn_in,post_samp=post_samp, chains=chains,cores=cores) vals, mean, var = cross_validate(df, my_model, start_game, end_game, thresh, vegas_years, first_feature, normalize=False, ppc=True) print(mean, var)
def catboost_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'): CatBoostClassifier_param = { "iterations": [100], #[100, 1150, 200, 300], "learning_rate": [0.08, 0.09], # [0.01, 0.03, 0.1, 0.3, 0.5, 1] "max_depth": [5], #[3,5,8] "l2_leaf_reg": [5], #[2,10,15] } model = Modelling().best_model_fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, clf=CatBoostClassifier( verbose=0, loss_function='Logloss', random_seed=RANDOM_SEED, ), param=CatBoostClassifier_param, cv_type=cv_type) return model
def lgb_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'): LGBMClassifier_param = { "learning_rate": [0.2, 0.3], "num_leaves": [10], "max_depth": [7, 8], "feature_fraction": [0.4, 0.6], "lambda": [0.3, 0.4], "boosting": ['gbdt', 'dart'], "num_boost_round": [100, 120], # "min_gain_to_split": [], # "max_cat_group": [], # "bagging_fraction": [], # "min_data_in_leaf": [], } model = Modelling().best_model_fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, clf=LGBMClassifier( application='binary', metric='binary_logloss', save_binary=True), param=LGBMClassifier_param, cv_type=cv_type) return model
def __init__(self, data, forecastMessure, hidden_layers=[20, 15], activation_functions=['relu', 'relu'], optimizer=SGD(), loss='mean_absolute_error'): Modelling.__init__(self, df, forecastMessure) self.data = data self.forecastMessure = forecastMessure self.hidden_layers = hidden_layers self.activation_functions = activation_functions self.optimizer = optimizer self.loss = loss if len(self.hidden_layers) != len(self.activation_functions): raise Exception( "hidden_layers size must match activation_functions size")
def postFlaks(): # get user's inputs user_CRIM = request.form.get("CRIM", False) user_ZN = request.form.get("ZN", False) user_INDUS = request.form.get("INDUS", False) user_CHAS = request.form.get("CHAS", False) user_NOX = request.form.get("NOX", False) user_RM = request.form.get("RM", False) user_AGE = request.form.get("AGE", False) user_DIS = request.form.get("DIS", False) user_RAD = request.form.get("RAD", False) user_TAX = request.form.get("TAX", False) user_PTRATIO = request.form.get("PTRATIO", False) user_B = request.form.get("B", False) user_LSTAT = request.form.get("LSTAT", False) # get user's phone nuber user_phone_number = request.form.get("phone", False) # instantiate the model model = Modelling() prediction = model.predictUserInput(user_CRIM, user_ZN, user_INDUS, user_CHAS, user_NOX, user_RM, user_AGE, user_DIS, user_RAD, user_TAX, user_PTRATIO, user_B, user_LSTAT) # append meaniningful message to the prediction prediction = "Predicted Median value of owner-occupied homes in $1000's is: " + prediction # if the phone number is valid, then send a message snsService = Sns() # user_phone_number = str(user_phone_number) returnMessage = snsService.sendSMS(user_phone_number, prediction) # check if user did not provide phone nubmer, then don't return any message from the SNS class if len(user_phone_number) == 0: returnMessage = "" # prediction = 'RESULT' return render_template('prediction.html', result=[prediction, returnMessage])
def dt_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'): DecisionTreeClassifier_param = { "criterion": ["gini", "entropy"], "max_depth": [8], "max_features": [0.7], "min_samples_leaf": [10], "min_samples_split": [3], "random_state": [RANDOM_SEED] } model = Modelling().best_model_fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, clf=DecisionTreeClassifier(), param=DecisionTreeClassifier_param, cv_type=cv_type) return model
def logit_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'): LogisticRegression_param = { "C": [0.25], "max_iter": [135], "penalty": ["l2"], "random_state": [RANDOM_SEED] } model = Modelling().best_model_fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, clf=LogisticRegression(), param=LogisticRegression_param, cv_type=cv_type) return model
def etc_model(self, X_train, y_train, X_val, y_val, cv_type='gridsearch'): ExtraTreesClassifier_param = { "n_estimators": [175], "criterion": ['gini', 'entropy'], "max_depth": [21], "min_samples_split": [4], "min_samples_leaf": [8], "max_features": [0.5], } model = Modelling().best_model_fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, clf=ExtraTreesClassifier( random_state=RANDOM_SEED, bootstrap=True, n_jobs=-2, warm_start=True), param=ExtraTreesClassifier_param, cv_type=cv_type) return model
def main(data_directory_path, merge_csv_file_name, prepared_csv_file_name, features_target_csv_file_name): print("Model Process starts") #path = "E:\PlusDental_Task\sample_data" #merge_file_name = "data_merged.csv" #prepared_file_name = "data_prepared.csv" #feature_target_file_name = "features_target.csv" start = time.time() data_read_and_merge = DataReadAndMerge(data_directory_path, merge_csv_file_name) # data_read_and_merge.readAndMerge(path,merge_file_name) data_prepare = DataPrepare(data_directory_path, merge_csv_file_name) #data_prepare.dataPrepare(path, merge_file_name) #data_prepared = pd.read_csv(os.path.join(data_directory_path, prepared_csv_file_name)) #print(data_prepared.head()) #print(data_prepared.shape) #data_explore = DataExploration(data_prepared) #data_explore.dataExploration(data_prepared) feature_engineering = FeatureEngineering(data_directory_path, prepared_csv_file_name) #feature_engineering.featureEngineering(path,prepared_file_name) modelling = Modelling(data_directory_path, features_target_csv_file_name) #modelling.modelling(data_directory_path, features_target_csv_file_name) model_pipeline = ModelPipeline(data_read_and_merge, data_prepare, feature_engineering, modelling) model_pipeline.fit(data_directory_path, merge_csv_file_name, prepared_csv_file_name, features_target_csv_file_name) print("Model Process ends", time.time() - start, "s")
def main(data_directory_path): print("Model Process starts") start = time.time() data_read = DataRead(data_directory_path) data_prepare = DataPrepare() data_explore = DataExploration() feature_engineering = FeatureEngineering() modelling = Modelling() model_pipeline = ModelPipeline(data_read, data_explore, data_prepare, feature_engineering, modelling) model_pipeline.fit(data_directory_path) print("Model Process ends", time.time() - start, "s")
from modelling import Modelling import pandas as pd # Set file to process, read it in, and create a Model for it file_name = "docs/description.csv" description = pd.read_csv(file_name) model = Modelling(description)
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import utils.helper_models as helper_models from modelling import Modelling from xgboost import plot_tree #path = "E:\PlusDental_Task\presentation\Churn_Modelling.csv" data_directory_path = "E:\PlusDental_Task\presentation" features_target_csv_file_name = "Churn_Modelling.csv" #data_raw = pd.read_csv(path) #print(data_raw.shape) # Checking missing values Just for confirmation # data_check = helper_models.missing_values_table(data_raw) # print('Missing values in a column with the percentage', data_check) modelling = Modelling(data_directory_path, features_target_csv_file_name) modelling.modelling(data_directory_path, features_target_csv_file_name)
def setUp(self): self.description = pd.read_csv("docs/description.csv") self.description_1000 = pd.read_csv("docs/description_1000.csv") self.mdl = Modelling(self.description)