Ejemplo n.º 1
0
 def __init__(self, path):
     self.input_file = path
     self.preprocesser = preprocessar()
     self.cluster = cluter()
     self.best_model = best_model_tuner()
     self.log = APP_logger()
     self.model_save = model_file()
class model_file:
    def __init__(self):
        self.file = "model//"
        self.log = APP_logger()
        self.pickle = pickle

    def model_save(self, model, filename):
        try:
            path = self.file + filename
            if os.path.isdir(path):
                shutil.rmtree(path)
                os.makedirs(path)
            else:
                os.makedirs(path)
            with open(path + "//" + filename + ".pickle", "wb") as f:

                pickle.dump(model, f)
                f.close()
            message = "{a} model saved succesfully".format(a=model)
            self.log.log("log_msg//model_load_pre.txt", message)

        except Exception as e:
            message = "there is some erroe in this funtion,the error is {fun}".format(
                fun=e)
            self.log.log("log_msg//model_load_pre.txt", message)
            return e

    def load_model(self, filename):
        try:

            self.file = filename
            path = "model//" + self.file
            with open(path + "//" + self.file + ".pickle", "rb") as f:
                model = pickle.load(f)
                f.close()

            message = "successfully {model} imported ".format(model=filename)
            self.log.log("log_msg//load_mdel.txt", message)
            return model
        except Exception as e:

            message = "there is some erroe in this funtion,the error is {fun}".format(
                fun=e)
            self.log.log("log_msg//load_model.txt", message)
            return e

    def find_correct_model_file(self, num_clu):
        #try:

        list_file = os.listdir(self.file)
        for file in list_file:
            if file[-1] == str(num_clu):
                return file

        message = "successfully runed this programme"
        self.log.log("log_msg//model_best_pre..txt", message)
Ejemplo n.º 3
0
class model_training:
    def __init__(self, path):
        self.input_file = path
        self.preprocesser = preprocessar()
        self.cluster = cluter()
        self.best_model = best_model_tuner()
        self.log = APP_logger()
        self.model_save = model_file()

    def training_model(self):
        #try:

        data = pd.read_csv(self.input_file)
        X, Y = self.preprocesser.splite_data(data,
                                             "default payment next month")
        # x_train,x_test,y_train,y_test=self.preprocesser.train_test_splt(X,Y)
        n_cluster = self.cluster.num_cluster(X)
        x = self.cluster.K_cluster(X, n_cluster)
        self.model_save.model_save(self.cluster, "KMeans")
        x["label"] = Y

        list_of_cluster = x["cluster"].unique()
        for i in list_of_cluster:
            cluster_data = X[X["cluster"] == i]
            cluster_feature = cluster_data.drop(columns=["cluster", "label"])
            cluster_label = cluster_data["label"]
            x_train, x_test, y_train, y_test = train_test_split(
                cluster_feature, cluster_label, test_size=.30, random_state=42)

            model, model_name = self.best_model.best_model(
                x_train, x_test, y_train, y_test)

            self.model_save.model_save(model, model_name + str(i))

        message = "succesfully trainied model"
        self.log.log("log_msg//model_finaltrain.txt", message)
Ejemplo n.º 4
0
 def __init__(self):
     self.xgboost = XGBClassifier()
     self.naive = GaussianNB()
     self.log = APP_logger()
Ejemplo n.º 5
0
class best_model_tuner:
    def __init__(self):
        self.xgboost = XGBClassifier()
        self.naive = GaussianNB()
        self.log = APP_logger()

    def best_para_for_naive(self, x_train, y_train):
        try:
            self.para_gired = {
                "var_smoothing":
                [1e-9, .001, .05, .08, .1, .5, 1e-8, 1e-7, 1e-6]
            }
            giried = GridSearchCV(self.naive, self.para_gired, cv=5, verbose=3)
            giried.fit(x_train, y_train)
            best_var_smoothing = giried.best_params_["var_smoothing"]
            # return best_var_smoothing
            self.naive1 = GaussianNB(var_smoothing=best_var_smoothing)
            self.naive1.fit(x_train, y_train)
            message = "succesfully naive model best parameter find"
            self.log.log("log_msg//naive_model.txt", message)
            return self.naive1
        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(
                a=e)
            self.log.log("log_msg//naive_model.txt", message)
            return e

    def best_para_for_XGboost(self, x_train, y_train):
        try:

            para = {"n_estimators": [100, 200, 250], "max_depth": [2, 6, 10]}
            gired = GridSearchCV(self.xgboost,
                                 param_grid=para,
                                 cv=5,
                                 verbose=3)
            gired.fit(x_train, y_train)
            # return gired.best_params_
            self.n_estimater = gired.best_params_["n_estimators"]
            self.max_depth = gired.best_params_["max_depth"]
            self.xgboost = XGBClassifier(n_estimators=self.n_estimater,
                                         max_depth=self.max_depth)
            self.xgboost.fit(x_train, y_train)
            message = "succesfully xgboost model best parameter find"
            self.log.log("log_msg//xgboost_model.txt", message)
            return self.xgboost
        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(
                a=e)
            self.log.log("log_msg//xgboost_model.txt", message)
            return e

    def best_model(self, x_train, x_test, y_train, y_test):
        try:

            self.xgboost = self.best_para_for_XGboost(x_train, y_train)
            self.naive = self.best_para_for_naive(x_train, y_train)
            xg_pre = self.xgboost.predict(x_test)
            naive_pre = self.naive.predict(x_test)
            xg_score = accuracy_score(y_test, xg_pre)
            naive_score = accuracy_score(y_test, naive_pre)
            message = "succesfully best model find"
            self.log.log("log_msg//best_model.txt", message)
            # return naive_score,xg_score

            if xg_score > naive_score:
                return self.xgboost, "xgboost"
            else:
                return self.naive, "naive"

        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(
                a=e)
            self.log.log("log_msg//best_model.txt", message)
            return e
Ejemplo n.º 6
0
class db_vali:
    def __init__(self):
        self.path = "Training_database//"
        self.log = APP_logger()
        self.good_file_path = "Training_raw_file//Good_data"
        self.bad_file_path = "Training_raw_file//bad_data"

    def DBconnection(self, DatabaseName):
        try:
            conn = sqlite3.connect(self.path + DatabaseName + ".db")
            message = "succesfully Database connection done"
            self.log.log("log_msg//database.txt", message)
        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(a=e)
            self.log.log("log_msg//database.txt", message)
            return e
        return conn

    def dbtablecreation(self, Databasename, column):
        try:
            conn = self.DBconnection(Databasename)
            c = conn.cursor()
            c.execute("SELECT count(name)  FROM sqlite_master WHERE type = 'table'AND name = 'Good_Raw_Data'")
            if c.fetchone()[0] == 1:
                conn.close()
            else:
                for col in column.keys():
                    type=column[col]
                    try:
                        c.execute("ALTER TABLE Good_Raw_Data ADD column'{col_name}'{type}".format(col_name=col,type=type))
                    except:
                        c.execute("CREATE TABLE Good_Raw_Data({col_name} {type})".format(col_name=col,type=type))
                message = "succesfully Database table_creation connection done"
                self.log.log("log_msg//database.txt", message)

        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(a=e)
            self.log.log("log_msg//database.txt", message)
            return e
        conn.close()
    def insert_table_into_db(self,Databasename):
        try:


            conn=self.DBconnection(Databasename)
            c=conn.cursor()
            list_files=os.listdir("training_raw_file//Good_data//")
            for i in list_files:
                with open("training_raw_file//Good_data//"+i,"r")as f:
                    next(f,1)
                    for j in enumerate(f):
                        c.execute("INSERT INTO Good_Raw_Data VALUES ({VALUE})".format(VALUE=j[1]))
            message = "succesfully Database connection done"
            self.log.log("log_msg//database.txt", message)
            conn.commit()
            c.close()
            conn.close()
        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(a=e)
            self.log.log("log_msg//database.txt", message)
            return e
        conn.close()

    def Export_DB_csv(self,Databasename):
        try:
            input_file = "FileFrom_DB"
            conn = self.DBconnection(Databasename)
            c = conn.cursor()
            if not os.path.isdir(input_file):
                os.makedirs(input_file)
            c.execute("SELECT * FROM Good_Raw_Data")
            result=c.fetchall()
            header=[i[0] for i in c.description]
            csv_file=csv.writer(open("FileFrom_DB//input.csv","w",newline=""),delimiter=",")
            csv_file.writerow(header)
            csv_file.writerows(result)
            message = "succesfully Database Exported into csv file  connection done"
            self.log.log("log_msg//database.txt", message)
        except Exception as e:
            message = "there is some error in database creation  function ,the error is {a}".format(a=e)
            self.log.log("log_msg//database.txt", message)
        c.close()
Ejemplo n.º 7
0
 def __init__(self):
     self.path = "Training_database//"
     self.log = APP_logger()
     self.good_file_path = "Training_raw_file//Good_data"
     self.bad_file_path = "Training_raw_file//bad_data"
Ejemplo n.º 8
0
 def __init__(self):
     self.path = "prediction_database//"
     self.log = APP_logger()
     self.good_file_path = "prediction_raw_file//Good_data"
     self.bad_file_path = "prediction_raw_file//bad_data"
class prediction_val:

    def __init__(self,prediction_file_dir):
        self.Batch_dir=prediction_file_dir
        self.Log=APP_logger()
        self.schema="schema_prediction.json"

    def schema_values(self):
        try:
            with open(self.schema,"r")as f:
                dic=json.load(f)
            SampleFileName=dic["SampleFileName"]
            LengthOfDateStampInFile=dic["LengthOfDateStampInFile"]
            LengthOfTimeStampInFile=dic["LengthOfTimeStampInFile"]
            NumberofColumns=dic["NumberofColumns"]
            ColName=dic["ColName"]
            message="succesfully schema_value created"
            self.Log.log("log_msg//schema_values_pre.txt",message)
            return SampleFileName,LengthOfDateStampInFile,LengthOfTimeStampInFile,NumberofColumns,ColName
        except Exception as e:
            message = "there is some error schema_values function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//schema_values_pre.txt", message)
            return e

    def create_good_bad_folder(self):
        try:
            path=os.path.join("prediction_raw_file//Good_data")
            if not os.path.isdir(path):
                os.makedirs(path)
            path=os.path.join("prediction_raw_file//bad_data")
            if not os.path.isdir(path):
                os.makedirs(path)
            message = "succesfully  created good and bad raw data folder"
            self.Log.log("log_msg//create_good_bad_pre.txt", message)

        except Exception as e:
            message = "there is some error in create good and bad data folder  function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//create_good_bad_pre.txt", message)
            return e

    def delete_good_data(self):
        try:
            path="prediction_raw_file//Good_data"
            if os.path.isdir(path):
                shutil.rmtree(path)
            message = "succesfully   deleted good raw data folder"
            self.Log.log("log_msg//delete_good_pre.txt", message)
        except Exception as e:
            message = "there is some error in delete good data folder function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//delete_good_pre.txt", message)
            return e
    def delete_bad_data(self):
        try:
            path="prediction_raw_file//bad_data"
            if os.path.isdir(path):
                shutil.rmtree(path)
            message = "succesfully   deleted bad raw data folder"
            self.Log.log("log_msg//delete_bad_pre.txt", message)
        except Exception as e:
            message = "there is some error in delete bad data folder function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//delete_bad_pre.txt", message)
            return e
    def move_bad_to_archivedbad(self):
        try:
            now=datetime.now()
            date=now.date()
            current_time=date.strftime("%H%M%S")
            source="prediction_raw_file//bad_data"
            if os.path.isdir(source):
                path="prediction_Archived"
                if not os.path.isdir(path):
                    os.makedirs(path)
                source1=path+"\\bad_data_"+str(date)+"-"+str(current_time)
                if not os.path.isdir(source1):
                    os.makedirs(source1)
                list_files=os.listdir(source)
                for file in list_files:
                    if file not in os.listdir(source1):
                        shutil.move(source+"//"+file,source1)
            message = "succesfully   moved bad_file to  Archivedbad folder raw data folder"
            self.Log.log("log_msg//Archived_bad_pre.txt", message)

        except Exception as e:
            message = "there is some error in move_bad_to_archivedbad function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//Archived_bad_pre.txt", message)
            return e
    def cheack_filename(self,len_date,lentime):
        try:
            list_files=os.listdir("prediction_Batch_Files")
            for file in list_files:
                f_split=re.split(".csv",file)[0]
                s_split=re.split("_",f_split)
                if (s_split[0]=="creditCardFraud") and (len(s_split[1])==len_date) and (len(s_split[2])==lentime):
                    shutil.copy("prediction_Batch_Files//"+file,"prediction_raw_file//Good_data")
                else:
                    shutil.copy("prediction_Batch_Files//" + file, "prediction_raw_file//bad_data")
            message = "succesfully   checked all the correct file and copyed to respect folders"
            self.Log.log("log_msg//check_filename_.txt", message)
        except Exception as e:
            message = "there is having some error in file checked  function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//check_filename.txt", message)
            return e

    def cheack_col(self,numofcol):
        try:
            list_files=os.listdir("prediction_raw_file//Good_data")
            for file in list_files:
                data=pd.read_csv("prediction_raw_file//Good_data//"+file)
                if data.shape[1]==numofcol:
                    pass
                else:
                    shutil.move("prediction_raw_file//Good_data//"+file,"prediction_raw_file//bad_data")
            message = "succesfully   checked all columns and copyed to the respect folders"
            self.Log.log("log_msg//check_column_pre.txt", message)
        except Eception as e:
            message = "there is having some error in column checked  function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//check_column_pre.txt", message)
            return e

    def cheack_mis_col(self):
        try:
            list_files = os.listdir("prediction_raw_file//Good_data")
            for file in list_files:
                data=pd.read_csv("prediction_raw_file//Good_data//"+file)
                for col in data.columns:
                    if len(data[col])-data[col].count()==len(data[col]):
                        shutil.move("prediction_raw_file//Good_data//"+file,"prediction_raw_file//bad_data")
            message = "succesfully   checked all columns and copyed to the respect folders"
            self.Log.log("log_msg//check_missing_column_pre.txt", message)
        except Exception as e:
            message = "there is having some error in this   function ,the error is {a}".format(a=e)
            self.Log.log("log_msg//check_missing_column_pre.txt", message)
            return e
 def __init__(self,prediction_file_dir):
     self.Batch_dir=prediction_file_dir
     self.Log=APP_logger()
     self.schema="schema_prediction.json"
 def __init__(self, path):
     self.path = path
     self.prediction_val = prediction_val(self.path)
     self.db_vali = db_vali()
     self.log = APP_logger()
 def __init__(self, path):
     self.path = path
     self.Training_val = Training_val(self.path)
     self.db_vali = db_vali()
     self.log = APP_logger()
 def __init__(self):
     self.file = "model//"
     self.log = APP_logger()
     self.pickle = pickle
 def __init__(self, training_file_dir):
     self.Batch_dir = training_file_dir
     self.Log = APP_logger()
     self.schema = "schema_training.json"