def __init__(self, path): self.input_file = path self.preprocesser = preprocessar() self.cluster = cluter() self.best_model = best_model_tuner() self.log = APP_logger() self.model_save = model_file()
class model_file: def __init__(self): self.file = "model//" self.log = APP_logger() self.pickle = pickle def model_save(self, model, filename): try: path = self.file + filename if os.path.isdir(path): shutil.rmtree(path) os.makedirs(path) else: os.makedirs(path) with open(path + "//" + filename + ".pickle", "wb") as f: pickle.dump(model, f) f.close() message = "{a} model saved succesfully".format(a=model) self.log.log("log_msg//model_load_pre.txt", message) except Exception as e: message = "there is some erroe in this funtion,the error is {fun}".format( fun=e) self.log.log("log_msg//model_load_pre.txt", message) return e def load_model(self, filename): try: self.file = filename path = "model//" + self.file with open(path + "//" + self.file + ".pickle", "rb") as f: model = pickle.load(f) f.close() message = "successfully {model} imported ".format(model=filename) self.log.log("log_msg//load_mdel.txt", message) return model except Exception as e: message = "there is some erroe in this funtion,the error is {fun}".format( fun=e) self.log.log("log_msg//load_model.txt", message) return e def find_correct_model_file(self, num_clu): #try: list_file = os.listdir(self.file) for file in list_file: if file[-1] == str(num_clu): return file message = "successfully runed this programme" self.log.log("log_msg//model_best_pre..txt", message)
class model_training: def __init__(self, path): self.input_file = path self.preprocesser = preprocessar() self.cluster = cluter() self.best_model = best_model_tuner() self.log = APP_logger() self.model_save = model_file() def training_model(self): #try: data = pd.read_csv(self.input_file) X, Y = self.preprocesser.splite_data(data, "default payment next month") # x_train,x_test,y_train,y_test=self.preprocesser.train_test_splt(X,Y) n_cluster = self.cluster.num_cluster(X) x = self.cluster.K_cluster(X, n_cluster) self.model_save.model_save(self.cluster, "KMeans") x["label"] = Y list_of_cluster = x["cluster"].unique() for i in list_of_cluster: cluster_data = X[X["cluster"] == i] cluster_feature = cluster_data.drop(columns=["cluster", "label"]) cluster_label = cluster_data["label"] x_train, x_test, y_train, y_test = train_test_split( cluster_feature, cluster_label, test_size=.30, random_state=42) model, model_name = self.best_model.best_model( x_train, x_test, y_train, y_test) self.model_save.model_save(model, model_name + str(i)) message = "succesfully trainied model" self.log.log("log_msg//model_finaltrain.txt", message)
def __init__(self): self.xgboost = XGBClassifier() self.naive = GaussianNB() self.log = APP_logger()
class best_model_tuner: def __init__(self): self.xgboost = XGBClassifier() self.naive = GaussianNB() self.log = APP_logger() def best_para_for_naive(self, x_train, y_train): try: self.para_gired = { "var_smoothing": [1e-9, .001, .05, .08, .1, .5, 1e-8, 1e-7, 1e-6] } giried = GridSearchCV(self.naive, self.para_gired, cv=5, verbose=3) giried.fit(x_train, y_train) best_var_smoothing = giried.best_params_["var_smoothing"] # return best_var_smoothing self.naive1 = GaussianNB(var_smoothing=best_var_smoothing) self.naive1.fit(x_train, y_train) message = "succesfully naive model best parameter find" self.log.log("log_msg//naive_model.txt", message) return self.naive1 except Exception as e: message = "there is some error in database creation function ,the error is {a}".format( a=e) self.log.log("log_msg//naive_model.txt", message) return e def best_para_for_XGboost(self, x_train, y_train): try: para = {"n_estimators": [100, 200, 250], "max_depth": [2, 6, 10]} gired = GridSearchCV(self.xgboost, param_grid=para, cv=5, verbose=3) gired.fit(x_train, y_train) # return gired.best_params_ self.n_estimater = gired.best_params_["n_estimators"] self.max_depth = gired.best_params_["max_depth"] self.xgboost = XGBClassifier(n_estimators=self.n_estimater, max_depth=self.max_depth) self.xgboost.fit(x_train, y_train) message = "succesfully xgboost model best parameter find" self.log.log("log_msg//xgboost_model.txt", message) return self.xgboost except Exception as e: message = "there is some error in database creation function ,the error is {a}".format( a=e) self.log.log("log_msg//xgboost_model.txt", message) return e def best_model(self, x_train, x_test, y_train, y_test): try: self.xgboost = self.best_para_for_XGboost(x_train, y_train) self.naive = self.best_para_for_naive(x_train, y_train) xg_pre = self.xgboost.predict(x_test) naive_pre = self.naive.predict(x_test) xg_score = accuracy_score(y_test, xg_pre) naive_score = accuracy_score(y_test, naive_pre) message = "succesfully best model find" self.log.log("log_msg//best_model.txt", message) # return naive_score,xg_score if xg_score > naive_score: return self.xgboost, "xgboost" else: return self.naive, "naive" except Exception as e: message = "there is some error in database creation function ,the error is {a}".format( a=e) self.log.log("log_msg//best_model.txt", message) return e
class db_vali: def __init__(self): self.path = "Training_database//" self.log = APP_logger() self.good_file_path = "Training_raw_file//Good_data" self.bad_file_path = "Training_raw_file//bad_data" def DBconnection(self, DatabaseName): try: conn = sqlite3.connect(self.path + DatabaseName + ".db") message = "succesfully Database connection done" self.log.log("log_msg//database.txt", message) except Exception as e: message = "there is some error in database creation function ,the error is {a}".format(a=e) self.log.log("log_msg//database.txt", message) return e return conn def dbtablecreation(self, Databasename, column): try: conn = self.DBconnection(Databasename) c = conn.cursor() c.execute("SELECT count(name) FROM sqlite_master WHERE type = 'table'AND name = 'Good_Raw_Data'") if c.fetchone()[0] == 1: conn.close() else: for col in column.keys(): type=column[col] try: c.execute("ALTER TABLE Good_Raw_Data ADD column'{col_name}'{type}".format(col_name=col,type=type)) except: c.execute("CREATE TABLE Good_Raw_Data({col_name} {type})".format(col_name=col,type=type)) message = "succesfully Database table_creation connection done" self.log.log("log_msg//database.txt", message) except Exception as e: message = "there is some error in database creation function ,the error is {a}".format(a=e) self.log.log("log_msg//database.txt", message) return e conn.close() def insert_table_into_db(self,Databasename): try: conn=self.DBconnection(Databasename) c=conn.cursor() list_files=os.listdir("training_raw_file//Good_data//") for i in list_files: with open("training_raw_file//Good_data//"+i,"r")as f: next(f,1) for j in enumerate(f): c.execute("INSERT INTO Good_Raw_Data VALUES ({VALUE})".format(VALUE=j[1])) message = "succesfully Database connection done" self.log.log("log_msg//database.txt", message) conn.commit() c.close() conn.close() except Exception as e: message = "there is some error in database creation function ,the error is {a}".format(a=e) self.log.log("log_msg//database.txt", message) return e conn.close() def Export_DB_csv(self,Databasename): try: input_file = "FileFrom_DB" conn = self.DBconnection(Databasename) c = conn.cursor() if not os.path.isdir(input_file): os.makedirs(input_file) c.execute("SELECT * FROM Good_Raw_Data") result=c.fetchall() header=[i[0] for i in c.description] csv_file=csv.writer(open("FileFrom_DB//input.csv","w",newline=""),delimiter=",") csv_file.writerow(header) csv_file.writerows(result) message = "succesfully Database Exported into csv file connection done" self.log.log("log_msg//database.txt", message) except Exception as e: message = "there is some error in database creation function ,the error is {a}".format(a=e) self.log.log("log_msg//database.txt", message) c.close()
def __init__(self): self.path = "Training_database//" self.log = APP_logger() self.good_file_path = "Training_raw_file//Good_data" self.bad_file_path = "Training_raw_file//bad_data"
def __init__(self): self.path = "prediction_database//" self.log = APP_logger() self.good_file_path = "prediction_raw_file//Good_data" self.bad_file_path = "prediction_raw_file//bad_data"
class prediction_val: def __init__(self,prediction_file_dir): self.Batch_dir=prediction_file_dir self.Log=APP_logger() self.schema="schema_prediction.json" def schema_values(self): try: with open(self.schema,"r")as f: dic=json.load(f) SampleFileName=dic["SampleFileName"] LengthOfDateStampInFile=dic["LengthOfDateStampInFile"] LengthOfTimeStampInFile=dic["LengthOfTimeStampInFile"] NumberofColumns=dic["NumberofColumns"] ColName=dic["ColName"] message="succesfully schema_value created" self.Log.log("log_msg//schema_values_pre.txt",message) return SampleFileName,LengthOfDateStampInFile,LengthOfTimeStampInFile,NumberofColumns,ColName except Exception as e: message = "there is some error schema_values function ,the error is {a}".format(a=e) self.Log.log("log_msg//schema_values_pre.txt", message) return e def create_good_bad_folder(self): try: path=os.path.join("prediction_raw_file//Good_data") if not os.path.isdir(path): os.makedirs(path) path=os.path.join("prediction_raw_file//bad_data") if not os.path.isdir(path): os.makedirs(path) message = "succesfully created good and bad raw data folder" self.Log.log("log_msg//create_good_bad_pre.txt", message) except Exception as e: message = "there is some error in create good and bad data folder function ,the error is {a}".format(a=e) self.Log.log("log_msg//create_good_bad_pre.txt", message) return e def delete_good_data(self): try: path="prediction_raw_file//Good_data" if os.path.isdir(path): shutil.rmtree(path) message = "succesfully deleted good raw data folder" self.Log.log("log_msg//delete_good_pre.txt", message) except Exception as e: message = "there is some error in delete good data folder function ,the error is {a}".format(a=e) self.Log.log("log_msg//delete_good_pre.txt", message) return e def delete_bad_data(self): try: path="prediction_raw_file//bad_data" if os.path.isdir(path): shutil.rmtree(path) message = "succesfully deleted bad raw data folder" self.Log.log("log_msg//delete_bad_pre.txt", message) except Exception as e: message = "there is some error in delete bad data folder function ,the error is {a}".format(a=e) self.Log.log("log_msg//delete_bad_pre.txt", message) return e def move_bad_to_archivedbad(self): try: now=datetime.now() date=now.date() current_time=date.strftime("%H%M%S") source="prediction_raw_file//bad_data" if os.path.isdir(source): path="prediction_Archived" if not os.path.isdir(path): os.makedirs(path) source1=path+"\\bad_data_"+str(date)+"-"+str(current_time) if not os.path.isdir(source1): os.makedirs(source1) list_files=os.listdir(source) for file in list_files: if file not in os.listdir(source1): shutil.move(source+"//"+file,source1) message = "succesfully moved bad_file to Archivedbad folder raw data folder" self.Log.log("log_msg//Archived_bad_pre.txt", message) except Exception as e: message = "there is some error in move_bad_to_archivedbad function ,the error is {a}".format(a=e) self.Log.log("log_msg//Archived_bad_pre.txt", message) return e def cheack_filename(self,len_date,lentime): try: list_files=os.listdir("prediction_Batch_Files") for file in list_files: f_split=re.split(".csv",file)[0] s_split=re.split("_",f_split) if (s_split[0]=="creditCardFraud") and (len(s_split[1])==len_date) and (len(s_split[2])==lentime): shutil.copy("prediction_Batch_Files//"+file,"prediction_raw_file//Good_data") else: shutil.copy("prediction_Batch_Files//" + file, "prediction_raw_file//bad_data") message = "succesfully checked all the correct file and copyed to respect folders" self.Log.log("log_msg//check_filename_.txt", message) except Exception as e: message = "there is having some error in file checked function ,the error is {a}".format(a=e) self.Log.log("log_msg//check_filename.txt", message) return e def cheack_col(self,numofcol): try: list_files=os.listdir("prediction_raw_file//Good_data") for file in list_files: data=pd.read_csv("prediction_raw_file//Good_data//"+file) if data.shape[1]==numofcol: pass else: shutil.move("prediction_raw_file//Good_data//"+file,"prediction_raw_file//bad_data") message = "succesfully checked all columns and copyed to the respect folders" self.Log.log("log_msg//check_column_pre.txt", message) except Eception as e: message = "there is having some error in column checked function ,the error is {a}".format(a=e) self.Log.log("log_msg//check_column_pre.txt", message) return e def cheack_mis_col(self): try: list_files = os.listdir("prediction_raw_file//Good_data") for file in list_files: data=pd.read_csv("prediction_raw_file//Good_data//"+file) for col in data.columns: if len(data[col])-data[col].count()==len(data[col]): shutil.move("prediction_raw_file//Good_data//"+file,"prediction_raw_file//bad_data") message = "succesfully checked all columns and copyed to the respect folders" self.Log.log("log_msg//check_missing_column_pre.txt", message) except Exception as e: message = "there is having some error in this function ,the error is {a}".format(a=e) self.Log.log("log_msg//check_missing_column_pre.txt", message) return e
def __init__(self,prediction_file_dir): self.Batch_dir=prediction_file_dir self.Log=APP_logger() self.schema="schema_prediction.json"
def __init__(self, path): self.path = path self.prediction_val = prediction_val(self.path) self.db_vali = db_vali() self.log = APP_logger()
def __init__(self, path): self.path = path self.Training_val = Training_val(self.path) self.db_vali = db_vali() self.log = APP_logger()
def __init__(self): self.file = "model//" self.log = APP_logger() self.pickle = pickle
def __init__(self, training_file_dir): self.Batch_dir = training_file_dir self.Log = APP_logger() self.schema = "schema_training.json"