예제 #1
0
def trainingTest():
    try:
        az_blb_mgt = AzureBlobManagement()
        execution_id = str(uuid.uuid4())
        path = 'training-batch-files'
        train_valObj = train_validation(path,
                                        execution_id)  # object initialization
        train_valObj.train_validation(
        )  # calling the training_validation function
        trainModelObj = trainModel(execution_id)  # object initialization
        trainModelObj.trainingModel(
        )  # training the model for the files in the table
        bad_data_archived = "lat-" + execution_id
        directory = [
            container_name.name for container_name in
            az_blb_mgt.blob_service_client.list_containers()
        ]
        for dir in directory:
            if re.search('^' + bad_data_archived, dir):
                bad_data_archived = dir

        file_names = az_blb_mgt.getAllFileNameFromDirectory(bad_data_archived)

        message = "Hi Team,\n\n We have listed file name which was failed to process due to validation"
        i = 0
        for file in file_names:
            i = i + 1
            message = message + "\n" + str(i) + ") " + file
        message = message + "\n Thanks & regards\n Avnish Yadav"
        emailSender = EmailSender()
        emailSender.sendEmail(message, "Trainning failed file")
        print("Traing Completed")
    except Exception as e:
        print(str(e))
 def __init__(self, execution_id):
     #self.goodDataPath = "Training_Raw_files_validated/Good_Raw" #code commented by avnish yadav
     self.goodDataPath = "good-raw-file-train-validated"
     self.execution_id = execution_id
     #self.logger = App_Logger()#code commented by avnish yadav
     self.logger_db_writer = App_LoggerDB(execution_id)
     self.az_blob_mgt = AzureBlobManagement()
 def __init__(self,log_database,log_collection,execution_id):
     self.log_database=log_database
     self.log_collection=log_collection
     self.prediction_directory="prediction-file-from-db"
     self.filename="InputFile.csv"
     self.log_db_writer=App_LoggerDB(execution_id=execution_id)
     self.az_blob_mgt=AzureBlobManagement()
예제 #4
0
 def __init__(self, execution_id):
     self.execution_id = execution_id
     #self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
     self.goodDataPath = "good-raw-file-prediction-validated"
     #self.logger = App_Logger()
     self.log_db_writer = App_LoggerDB(execution_id=execution_id)
     self.log_database = "wafer_prediction_log"
     self.az_blob_mgt = AzureBlobManagement()
 def __init__(self, path, execution_id):
     self.Batch_Directory = path
     self.exexcution_id = execution_id
     #self.schema_path = 'schema_training.json'
     self.collection_name = "schema_training"  #code added by Avnish yadav
     self.database_name = "wafer_sys"  #code added by Avnish yadav
     #self.logger = App_Logger()
     self.logger_db_writer = App_LoggerDB(
         execution_id=execution_id)  #code added by Avnish yadav
     self.mongdb = MongoDBOperation()
     self.az_blob_mgt = AzureBlobManagement()
     self.good_directory_path = "good-raw-file-train-validated"
     self.bad_directory_path = "bad-raw-file-train-validated"
class App_LoggerDB:
    def __init__(self, execution_id):
        self.mongoDBObject = MongoDBOperation()
        self.azureBlobObject = AzureBlobManagement()
        self.execution_id = execution_id

    def log(self, database_name, collection_name, log_message):
        try:
            self.now = datetime.now()
            self.date = self.now.date()
            self.current_time = self.now.strftime("%H:%M:%S")
            log = {
                'Log_updated_date': self.now,
                'Log_update_time': self.current_time,
                'Log_message': log_message,
                'execution_id': self.execution_id
            }
            res = self.mongoDBObject.insertRecordInCollection(
                database_name, collection_name, log)
            if res > 0:
                return True
            else:
                log = {
                    'Log_updated_date': [self.now],
                    'Log_update_time': [self.current_time],
                    'Log_message': [log_message],
                    'execution_id': self.execution_id
                }
                self.azureBlobObject.saveDataFrameTocsv("db-fail-log",
                                                        "log_" +
                                                        self.execution_id,
                                                        pd.DataFrame(log),
                                                        mode="a+",
                                                        header=True)
            return True
        except Exception as e:
            log = {
                'Log_updated_date': [self.now],
                'Log_update_time': [self.current_time],
                'Log_message': [log_message],
                'execution_id': self.execution_id
            }
            log["Log_message"][0] = log["Log_message"][0] + str(e)
            self.azureBlobObject.saveDataFrameTocsv("db-fail-log",
                                                    "log_" + self.execution_id,
                                                    pd.DataFrame(log),
                                                    mode="a+",
                                                    header=True)
class Data_Getter:
    """
    This class shall  be used for obtaining the data from the source for training.

    Written By: iNeuron Intelligence
    Version: 1.0
    Revisions: None

    """
    """
    def __init__(self, file_object, logger_object):
        self.training_file='Training_FileFromDB/InputFile.csv'
        self.file_object=file_object
        self.logger_object=logger_object
    """
    def __init__(self, log_database, log_collection, execution_id):
        self.log_database = log_database
        self.log_collection = log_collection
        self.training_directory = "training-file-from-db"
        self.filename = "InputFile.csv"
        self.log_db_writer = App_LoggerDB(execution_id=execution_id)
        self.az_blob_mgt = AzureBlobManagement()

    def get_data(self):
        """
        Method Name: get_data
        Description: This method reads the data from source.
        Output: A pandas DataFrame.
        On Failure: Raise Exception

         Written By: iNeuron Intelligence
        Version: 1.0
        Revisions: None

        """
        #self.logger_object.log(self.file_object,'Entered the get_data method of the Data_Getter class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            "Entered the get_data method of the Data_Getter class")
        try:

            #self.data= pd.read_csv(self.training_file) # reading the data file
            self.data = self.az_blob_mgt.readCsvFileFromDirectory(
                self.training_directory, self.filename)
            #self.logger_object.log(self.file_object,'Data Load Successful.Exited the get_data method of the Data_Getter class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                "Data Load Successful.Exited the get_data method of the Data_Getter class"
            )
            return self.data
        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in get_data method of the Data_Getter class. Exception message: '+str(e))
            #self.logger_object.log(self.file_object,
            #                     'Data Load Unsuccessful.Exited the get_data method of the Data_Getter class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                "Exception occured in get_data method of the Data_Getter class. Exception message:Data Load Unsuccessful.Exited the get_data method of the Data_Getter class "
            )
            raise Exception()
 def __init__(self, log_database, log_collection, execution_id):
     self.execution_id = execution_id
     self.log_db_writer = App_LoggerDB(execution_id=execution_id)
     self.log_database = log_database
     self.log_collection = log_collection
     self.az_blob_mgt = AzureBlobManagement()
     self.mongoDB = MongoDBOperation()
     self.clf = RandomForestClassifier()
     self.xgb = XGBClassifier(objective='binary:logistic')
예제 #9
0
def predictionTest(path=None):
    try:
        az_blb_mgt = AzureBlobManagement()
        execution_id = str(uuid.uuid4())
        if path is None:
            path = 'prediction-batch-files'
        else:
            path = path
        pred_val = pred_validation(path, execution_id)  # object initialization

        pred_val.prediction_validation(
        )  # calling the prediction_validation function

        pred = prediction(path, execution_id)  # object initialization

        # predicting for dataset present in database
        path, json_predictions = pred.predictionFromModel()
        prediction_location = "prediction-output-file"
        file_list = "prediction-output-file"
        #selecting all failed file name
        bad_data_archived = "lap-" + execution_id
        directory = [
            container_name.name for container_name in
            az_blb_mgt.blob_service_client.list_containers()
        ]
        for dir in directory:
            if re.search('^' + bad_data_archived, dir):
                bad_data_archived = dir

        file_names = az_blb_mgt.getAllFileNameFromDirectory(bad_data_archived)

        message = "Hi Team,\n\n We have listed file name which was failed to process due to validation"
        i = 0
        for file in file_names:
            i = i + 1
            message = message + "\n" + str(i) + ") " + file
        message = message + "\n Thanks & regards\n Avnish Yadav"
        emailSender = EmailSender()
        emailSender.sendEmail(message, "Prediction failed file")
        print(path, json_predictions)
    except Exception as e:
        print(str(e))
 def __init__(self, path, execution_id):
     self.raw_data = Raw_Data_validation(path, execution_id)
     self.dataTransform = dataTransform(execution_id)
     #self.dBOperation = dBOperation() # code commented by avnish yadav
     self.dBOperationMongoDB = DbOperationMongoDB(
         execution_id)  # code added by avnish yadav
     #self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') # code commented by avnish yadav
     self.log_database = "wafer_training_log"  #code added by Avnish Yadav
     self.log_collection = "training_main_log"  #code added by Avnish Yadav
     #self.log_writer = logger.App_Logger() # code commented by Avnish Yadav
     self.execution_id = execution_id  #code added by Avnish Yadav
     self.logDB_write = App_LoggerDB(
         execution_id=execution_id)  #code Added by Avnish Yadav
     self.az_blob_mgt = AzureBlobManagement()  #code Added by Avnish Yadav
class Raw_Data_validation:
    """
             This class shall be used for handling all the validation done on the Raw Training Data!!.

             Written By: iNeuron Intelligence
             Version: 1.0
             Revisions: None

             """
    def __init__(self, path, execution_id):
        self.Batch_Directory = path
        self.exexcution_id = execution_id
        #self.schema_path = 'schema_training.json'
        self.collection_name = "schema_training"  #code added by Avnish yadav
        self.database_name = "wafer_sys"  #code added by Avnish yadav
        #self.logger = App_Logger()
        self.logger_db_writer = App_LoggerDB(
            execution_id=execution_id)  #code added by Avnish yadav
        self.mongdb = MongoDBOperation()
        self.az_blob_mgt = AzureBlobManagement()
        self.good_directory_path = "good-raw-file-train-validated"
        self.bad_directory_path = "bad-raw-file-train-validated"

    def valuesFromSchema(self):
        """
                        Method Name: valuesFromSchema
                        Description: This method extracts all the relevant information from the pre-defined "Schema" file.
                        Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns
                        On Failure: Raise ValueError,KeyError,Exception

                         Written By: iNeuron Intelligence
                        Version: 1.0
                        Revisions: None

                                """
        try:
            """code commented by Avnish Yadav
            with open(self.schema_path, 'r') as f:
                dic = json.load(f)
                f.close()
            """
            #code started by Avnish Yadav
            log_database = "wafer_training_log"
            log_collection = "values_from_schema_validation"

            df_schema_training = self.mongdb.getDataFrameofCollection(
                self.database_name, self.collection_name)
            dic = {}
            [
                dic.update({i: df_schema_training.loc[0, i]})
                for i in df_schema_training.columns
            ]
            del df_schema_training
            #code ended by Avnish Yadav

            pattern = dic['SampleFileName']
            LengthOfDateStampInFile = dic['LengthOfDateStampInFile']
            LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile']
            column_names = dic['ColName']
            NumberofColumns = dic['NumberofColumns']

            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            message = "LengthOfDateStampInFile:: %s" % LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile + "\t " + "NumberofColumns:: %s" % NumberofColumns + "\n"
            #self.logger.log(file,message) code commented by Avnish Yadav
            self.logger_db_writer.log(log_database, log_collection, message)

            #file.close()

        except ValueError:
            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            #self.logger.log(file,"ValueError:Value not found inside schema_training.json")
            #file.close()
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error occured in class:Raw_Data_validation method: valuesFromSchema  ValueError:Value not found inside collection schema_training"
            )
            raise ValueError

        except KeyError:
            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            #self.logger.log(file, "KeyError:Key value error incorrect key passed")
            #file.close()
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error occured in class:Raw_Data_validation method: valuesFromSchema KeyError:Key value error incorrect key passed"
            )

            raise KeyError

        except Exception as e:
            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            #self.logger.log(file, str(e))
            #file.close()
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error occured in class:Raw_Data_validation method: valuesFromSchema error"
                + str(e))
            raise e

        return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns

    def manualRegexCreation(self):
        """
                                Method Name: manualRegexCreation
                                Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file.
                                            This Regex is used to validate the filename of the training data.
                                Output: Regex pattern
                                On Failure: None

                                 Written By: iNeuron Intelligence
                                Version: 1.0
                                Revisions: None

                                        """
        regex = "['wafer']+['\_'']+[\d_]+[\d]+\.csv"
        return regex

    def createDirectoryForGoodBadRawData(self):
        """
                                      Method Name: createDirectoryForGoodBadRawData
                                      Description: This method creates directories to store the Good Data and Bad Data
                                                    after validating the training data.

                                      Output: None
                                      On Failure: OSError

                                       Written By: iNeuron Intelligence
                                      Version: 1.0
                                      Revisions: None

                                              """
        """
        try:
            path = os.path.join("Training_Raw_files_validated/", "Good_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)
            path = os.path.join("Training_Raw_files_validated/", "Bad_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)

        except OSError as ex:
            file = open("Training_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file,"Error while creating Directory %s:" % ex)
            file.close()
            raise OSError
        """
        try:
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.createDirectory(self.good_directory_path,
                                             is_replace=True)
            self.az_blob_mgt.createDirectory(self.bad_directory_path,
                                             is_replace=True)
            msg = self.good_directory_path + " and " + self.bad_directory_path + " created successfully."
            self.logger_db_writer.log(log_database, log_collection, msg)
        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:createDirectoryForGoodBadRawData error: Failed to create directory " + self.good_directory_path + " and " + self.bad_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingGoodDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingGoodDataTrainingFolder
                                            Description: This method deletes the directory made  to store the Good Data
                                                          after loading the data in the table. Once the good files are
                                                          loaded in the DB,deleting the directory ensures space optimization.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """

        try:
            """
            path = 'Training_Raw_files_validated/'
            # if os.path.isdir("ids/" + userName):
            # if os.path.isdir(path + 'Bad_Raw/'):
            #     shutil.rmtree(path + 'Bad_Raw/')
            if os.path.isdir(path + 'Good_Raw/'):
                shutil.rmtree(path + 'Good_Raw/')
                file = open("Training_Logs/GeneralLog.txt", 'a+')
                self.logger.log(file,"GoodRaw directory deleted successfully!!!")
                file.close()
            """
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDirectory(self.good_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.good_directory_path + " deleted successfully!!")
            """
        except OSError as s:
            file = open("Training_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file,"Error while Deleting Directory : %s" %s)
            file.close()
            raise OSError
            """
        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingBadDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingBadDataTrainingFolder
                                            Description: This method deletes the directory made to store the bad Data.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """

        try:
            """
            path = 'Training_Raw_files_validated/'
            if os.path.isdir(path + 'Bad_Raw/'):
                shutil.rmtree(path + 'Bad_Raw/')
                file = open("Training_Logs/GeneralLog.txt", 'a+')
                self.logger.log(file,"BadRaw directory deleted before starting validation!!!")
                file.close()
        except OSError as s:
            file = open("Training_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file,"Error while Deleting Directory : %s" %s)
            file.close()
            raise OSError"""
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDirectory(self.bad_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.bad_directory_path + " deleted successfully!!")

        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def moveBadFilesToArchiveBad(self):
        """
                                            Method Name: moveBadFilesToArchiveBad
                                            Description: This method deletes the directory made  to store the Bad Data
                                                          after moving the data in an archive folder. We archive the bad
                                                          files to send them back to the client for invalid data issue.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """
        now = datetime.now()
        date = now.date()
        time = now.strftime("%H%M%S")
        try:
            log_database = "wafer_training_log"
            log_collection = "general_log"

            #source = 'Training_Raw_files_validated/Bad_Raw/'
            source = self.bad_directory_path
            destination = "lat-" + self.exexcution_id
            self.logger_db_writer.log(log_database, log_collection,
                                      "Started moving bad raw data..")
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(source):

                self.az_blob_mgt.moveFileInDirectory(source, destination, file)
                self.logger_db_writer.log(
                    log_database, log_collection, "File:" + file +
                    " moved to directory:" + destination + " successfully.")

            self.logger_db_writer.log(
                log_database, log_collection,
                "All bad raw file moved to directory:" + destination)

            self.az_blob_mgt.deleteDirectory(source)
            self.logger_db_writer.log(log_database, log_collection,
                                      "Deleting bad raw directory:" + source)
            """
            if os.path.isdir(source):

                path = "TrainingArchiveBadData"

                if not os.path.isdir(path):
                    os.makedirs(path)
                dest = 'TrainingArchiveBadData/BadData_' + str(date)+"_"+str(time)
                if not os.path.isdir(dest):
                    os.makedirs(dest)
                files = os.listdir(source)
                for f in files:
                    if f not in os.listdir(dest):
                        shutil.move(source + f, dest)
                file = open("Training_Logs/GeneralLog.txt", 'a+')
                self.logger.log(file,"Bad files moved to archive")
                path = 'Training_Raw_files_validated/'
                if os.path.isdir(path + 'Bad_Raw/'):
                    shutil.rmtree(path + 'Bad_Raw/')
                self.logger.log(file,"Bad Raw Data Folder Deleted successfully!!")
                file.close()
                """
        except Exception as e:
            """
            file = open("Training_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file, "Error while moving bad files to archive:: %s" % e)
            file.close()
            """
            self.logger_db_writer.log(
                log_database, log_collection,
                "class Raw_Data_validation method:moveBadFilesToArchiveBad Error while moving bad files to archive:"
                + str(e))
            raise e

    def validationFileNameRaw(self, regex, LengthOfDateStampInFile,
                              LengthOfTimeStampInFile):
        """
                    Method Name: validationFileNameRaw
                    Description: This function validates the name of the training csv files as per given name in the schema!
                                 Regex pattern is used to do the validation.If name format do not match the file is moved
                                 to Bad Raw Data folder else in Good raw data.
                    Output: None
                    On Failure: Exception

                     Written By: iNeuron Intelligence
                    Version: 1.0
                    Revisions: None

                """

        #pattern = "['Wafer']+['\_'']+[\d_]+[\d]+\.csv"
        # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted.
        """
        self.deleteExistingBadDataTrainingFolder()
        self.deleteExistingGoodDataTrainingFolder()
        #create new directories
        """
        self.createDirectoryForGoodBadRawData()
        #onlyfiles = [f for f in listdir(self.Batch_Directory)]
        onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
            self.Batch_Directory)
        try:
            log_database = "wafer_training_log"
            log_collection = "name_validation_log"
            #f = open("Training_Logs/nameValidationLog.txt", 'a+')
            for filename in onlyfiles:
                if (re.match(regex, filename)):
                    splitAtDot = re.split('.csv', filename)
                    splitAtDot = (re.split('_', splitAtDot[0]))
                    if len(splitAtDot[1]) == LengthOfDateStampInFile:
                        if len(splitAtDot[2]) == LengthOfTimeStampInFile:
                            """
                            shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Good_Raw")
                            self.logger.log(f,"Valid File name!! File moved to GoodRaw Folder :: %s" % filename)
"""
                            self.az_blob_mgt.copyFileInDirectory(
                                self.Batch_Directory, self.good_directory_path,
                                filename)
                            self.logger_db_writer.log(
                                log_database, log_collection,
                                "Valid File name!! File moved to " +
                                self.good_directory_path + filename)

                        else:
                            """
                            shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                            self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                            """
                            self.az_blob_mgt.copyFileInDirectory(
                                self.Batch_Directory, self.bad_directory_path,
                                filename)
                            msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                            self.logger_db_writer.log(log_database,
                                                      log_collection, msg)
                    else:
                        """
                        shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                        self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                        """
                        self.az_blob_mgt.copyFileInDirectory(
                            self.Batch_Directory, self.bad_directory_path,
                            filename)
                        msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                        self.logger_db_writer.log(log_database, log_collection,
                                                  msg)

                else:
                    """
                    shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                    self.logger.log(f, "Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                    """
                    self.az_blob_mgt.copyFileInDirectory(
                        self.Batch_Directory, self.bad_directory_path,
                        filename)
                    msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                    self.logger_db_writer.log(log_database, log_collection,
                                              msg)

            #f.close()

        except Exception as e:
            """
            f = open("Training_Logs/nameValidationLog.txt", 'a+')
            self.logger.log(f, "Error occured while validating FileName %s" % e)
            f.close()
            """
            msg = "Error occured while validating FileName " + str(e)
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def validateColumnLength(self, NumberofColumns):
        """
                          Method Name: validateColumnLength
                          Description: This function validates the number of columns in the csv files.
                                       It is should be same as given in the schema file.
                                       If not same file is not suitable for processing and thus is moved to Bad Raw Data folder.
                                       If the column number matches, file is kept in Good Raw Data for processing.
                                      The csv file is missing the first column name, this function changes the missing name to "Wafer".
                          Output: None
                          On Failure: Exception

                           Written By: iNeuron Intelligence
                          Version: 1.0
                          Revisions: None

                      """
        try:
            log_collection = "column_validation_log"
            log_database = "wafer_training_log"
            #f = open("Training_Logs/columnValidationLog.txt", 'a+')
            #self.logger.log(f,"Column Length Validation Started!!")
            self.logger_db_writer.log(log_database, log_collection,
                                      "Column Length Validation Started!!")
            #for file in listdir('Training_Raw_files_validated/Good_Raw/'):
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                #csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
                csv = self.az_blob_mgt.readCsvFileFromDirectory(
                    self.good_directory_path, file)
                if csv.shape[1] == NumberofColumns:
                    pass
                else:
                    """
                    shutil.move("Training_Raw_files_validated/Good_Raw/" + file, "Training_Raw_files_validated/Bad_Raw")
                    self.logger.log(f, "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
            """
                    self.az_blob_mgt.moveFileInDirectory(
                        self.good_directory_path, self.bad_directory_path,
                        file)
                    msg = "Invalid Column Length for the file!! File moved to " + self.bad_directory_path + "file:" + file
                    self.logger_db_writer.log(log_database, log_collection,
                                              msg)
            #self.logger.log(f, "Column Length Validation Completed!!")
            self.logger_db_writer.log(log_database, log_collection,
                                      "Column Length Validation Completed!!")
            """ except OSError:
            f = open("Training_Logs/columnValidationLog.txt", 'a+')
            self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
            f.close()
            raise OSError
            """
        except Exception as e:
            """
            f = open("Training_Logs/columnValidationLog.txt", 'a+')
            self.logger.log(f, "Error Occured:: %s" % e)
            f.close()
            """
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error Occured in class Raw_Data_validation method: validateColumnLength error:"
                + str(e))

            raise e
        #f.close()

    def validateMissingValuesInWholeColumn(self):
        """
                                  Method Name: validateMissingValuesInWholeColumn
                                  Description: This function validates if any column in the csv file has all values missing.
                                               If all the values are missing, the file is not suitable for processing.
                                               SUch files are moved to bad raw data.
                                  Output: None
                                  On Failure: Exception

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                              """
        try:
            log_database = "wafer_training_log"
            log_collection = "missing_values_in_column"
            #f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            #self.logger.log(f,"Missing Values Validation Started!!")
            self.logger_db_writer.log(log_database, log_collection,
                                      "Missing Values Validation Started!!")

            #for file in listdir('Training_Raw_files_validated/Good_Raw/'):
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                """
                csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
                count = 0
                """
                csv = self.az_blob_mgt.readCsvFileFromDirectory(
                    self.good_directory_path,
                    file,
                )
                count = 0
                for columns in csv:
                    if (len(csv[columns]) - csv[columns].count()) == len(
                            csv[columns]):
                        count += 1
                        """
                        shutil.move("Training_Raw_files_validated/Good_Raw/" + file,
                                    "Training_Raw_files_validated/Bad_Raw")
                        self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
                        """

                        self.az_blob_mgt.moveFileInDirectory(
                            self.good_directory_path, self.bad_directory_path,
                            file)
                        msg = "Invalid Column Length for the file!! File moved to " + self.bad_directory_path + ":: %s" % file
                        self.logger_db_writer.log(log_database, log_collection,
                                                  msg)
                        break
                if count == 0:
                    csv.rename(columns={"Unnamed: 1": "Wafer"}, inplace=True)
                    self.az_blob_mgt.saveDataFrameTocsv(
                        self.good_directory_path,
                        file,
                        csv,
                        index=None,
                        header=True)

                    #csv.to_csv("Training_Raw_files_validated/Good_Raw/" + file, index=None, header=True)
                """
        except OSError:
            f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
            f.close()
            raise OSError
            """
        except Exception as e:
            #f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            #self.logger.log(f, "Error Occured:: %s" % e)
            #f.close()
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error Occured class:Raw_Data_validation method:validateMissingValuesInWholeColumn error:"
                + str(e))
            raise e
 def __init__(self, execution_id):
     self.mongodb = MongoDBOperation()
     self.az_blob_mgt = AzureBlobManagement()
     self.logger_db_writer = App_LoggerDB(execution_id=execution_id)
     self.good_file_path = "good-raw-file-prediction-validated"
     self.bad_file_path = "bad-raw-file-prediction-validated"
class DbOperationMongoDB:
    def __init__(self, execution_id):
        self.mongodb = MongoDBOperation()
        self.az_blob_mgt = AzureBlobManagement()
        self.logger_db_writer = App_LoggerDB(execution_id=execution_id)
        self.good_file_path = "good-raw-file-prediction-validated"
        self.bad_file_path = "bad-raw-file-prediction-validated"

    def insertIntoTableGoodData(self, column_names):
        """
        Description: Load all csv file into mongo db database "training_database" ,collection:"Good_Raw_Data"


        :return:
        """
        try:
            prediction_database = "prediction_database"
            prediction_collection = "Good_Raw_Data"
            database_name = "wafer_prediction_log"
            collection_name = "db_insert_log"
            self.mongodb.dropCollection(prediction_database,
                                        prediction_collection)
            self.logger_db_writer.log(
                database_name, collection_name,
                "Droping collection:" + prediction_collection +
                " from database:" + prediction_database)
            self.logger_db_writer.log(
                database_name, collection_name,
                "Starting loading of good files in database:training_database and collection: Good_Raw_Data"
            )
            files = self.az_blob_mgt.getAllFileNameFromDirectory(
                self.good_file_path)
            self.logger_db_writer.log(
                database_name, collection_name,
                "No of file found in good-raw-file-train-validated " +
                str(len(files)))
            for file in files:
                try:
                    self.logger_db_writer.log(
                        database_name, collection_name,
                        "Insertion of file +" + file + " started...")
                    df = self.az_blob_mgt.readCsvFileFromDirectory(
                        self.good_file_path, file)
                    df.columns = column_names
                    self.mongodb.insertDataFrame(prediction_database,
                                                 prediction_collection, df)
                    self.logger_db_writer.log(
                        database_name, collection_name,
                        "File: {0} loaded successfully".format(file))
                except Exception as e:
                    self.logger_db_writer.log(database_name, collection_name,
                                              str(e))
                    self.az_blob_mgt.moveFileInDirectory(
                        self.good_file_path, self.bad_file_path, file)
                    self.logger_db_writer.log(
                        database_name, collection_name, "File: " + file +
                        " was not loaded successfully hence moved tp dir:" +
                        self.bad_file_path)

        except Exception as e:
            error_message = "Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:" + str(
                e)
            self.logger_db_writer.log(database_name, collection_name,
                                      error_message)

    def selectingDatafromtableintocsv(self, ):
        """

        :return:
        """
        try:
            directory_name = "prediction-file-from-db"
            file_name = "InputFile.csv"
            database_name = "wafer_prediction_log"
            collection_name = "export_to_csv"
            prediction_database = "prediction_database"
            prediction_collection = "Good_Raw_Data"
            msg = "starting of loading of database:" + prediction_database + ",collection:" + prediction_collection + " records into file:" + file_name
            self.logger_db_writer.log(database_name, collection_name, msg)
            df = self.mongodb.getDataFrameofCollection(prediction_database,
                                                       prediction_collection)
            msg = "Good_Raw_data has been loaded into pandas dataframe"
            self.logger_db_writer.log(database_name, collection_name, msg)
            self.az_blob_mgt.saveDataFrameTocsv(directory_name, file_name, df)
            msg = "InputFile.csv created successfully in directory" + directory_name
            self.logger_db_writer.log(database_name, collection_name, msg)
        except Exception as e:
            msg = "Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:" + str(
                e)
            self.logger_db_writer.log(database_name, collection_name, msg)
예제 #14
0
def getEventAndSubject(data):
    event_type = None
    container = None
    if 'eventType' in data.keys():
        event_type = data['eventType']
    if 'subject' in data.keys():
        start_index = data['subject'].index('containers') + len('containers') + 1
        stop_index = data['subject'].index('/blobs/', start_index, )
        container = data['subject'][start_index:stop_index]
    if container=='avnish-yadav':
        if event_type=='Microsoft.Storage.BlobCreated':
            azm=AzureBlobManagement(connection_string)
            azm_processing_dir=AzureBlobManagement()
            file_names=azm.getAllFileNameFromDirectory(directory_name=container)
            file_names=list(filter(lambda filename: filename.split(".")[-1] == 'csv', file_names))
            if len(file_names) > 0:
                is_created=azm_processing_dir.createDirectory("received-prediction",is_replace=True)
                if is_created==True:
                    for file in file_names:
                        df=azm.readCsvFileFromDirectory(container,file)
                        azm_processing_dir.saveDataFrameTocsv("received-prediction",file,df)
                    for file in file_names:
                        azm.moveFileInDirectory(container,"recycle-bin",file)
                    testing.predictionTest("received-prediction")
                    print(event_type, container)
 def __init__(self, execution_id):
     self.mongoDBObject = MongoDBOperation()
     self.azureBlobObject = AzureBlobManagement()
     self.execution_id = execution_id
 def __init__(self, log_database, log_collection, execution_id):
     self.log_database = log_database
     self.log_collection = log_collection
     self.execution_id = execution_id
     self.log_db_writer = App_LoggerDB(execution_id=execution_id)
     self.az_blob_mgt = AzureBlobManagement()
예제 #17
0
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: iNeuron Intelligence
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self, execution_id):
        self.execution_id = execution_id
        #self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.goodDataPath = "good-raw-file-prediction-validated"
        #self.logger = App_Logger()
        self.log_db_writer = App_LoggerDB(execution_id=execution_id)
        self.log_database = "wafer_prediction_log"
        self.az_blob_mgt = AzureBlobManagement()

    def replaceMissingWithNull(self):
        """
                                  Method Name: replaceMissingWithNull
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                                          """

        try:
            log_collection = "data_transform_log"
            #log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            #onlyfiles = [f for f in listdir(self.goodDataPath)]
            onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
                self.goodDataPath)
            for file in onlyfiles:
                #csv = pandas.read_csv(self.goodDataPath+"/" + file)
                csv = self.az_blob_mgt.readCsvFileFromDirectory(
                    self.goodDataPath, file)
                csv.fillna('NULL', inplace=True)
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                csv['Wafer'] = csv['Wafer'].str[6:]
                #csv.to_csv(self.goodDataPath+ "/" + file, index=None, header=True)
                self.az_blob_mgt.saveDataFrameTocsv(self.goodDataPath,
                                                    file,
                                                    csv,
                                                    idex=None,
                                                    header=True)
                #self.logger.log(log_file," %s: File Transformed successfully!!" % file)
                self.log_db_writer.log(
                    self.log_database, log_collection,
                    "File {0} transformed successfully".format(file))
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")

        except Exception as e:
            #self.logger.log(log_file, "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            #log_file.close()
            self.log_db_writer.log(
                self.log_database, log_collection,
                'Data Transformation failed because:' + str(e))
            raise e
예제 #18
0
def initiateTransfer(your_connection_string):
    """

    :param your_connection_string: pass azure storage account connection string
    :return:
    """
    try:
        azm_source = AzureBlobManagement()
        azm_destination = AzureBlobManagement(your_connection_string)
        prediction_dir = "prediction-batch-files"
        training_dir = "training-batch-files"

        training_files = azm_source.getAllFileNameFromDirectory(training_dir)
        prediction_files = azm_source.getAllFileNameFromDirectory(
            prediction_dir)

        for training_file in training_files:
            df = azm_source.readCsvFileFromDirectory(training_dir,
                                                     training_file)
            df.rename(columns={"Unnamed: 1": ""}, inplace=True)
            azm_destination.saveDataFrameTocsv(training_dir, training_file, df)
            print(
                "File:{} transfered successfully to dir {} of account name {}".
                format(training_file, training_dir,
                       azm_destination.blob_service_client.account_name))
        for prediction_file in prediction_files:
            df = azm_source.readCsvFileFromDirectory(prediction_dir,
                                                     prediction_file)
            df.rename(columns={"Unnamed: 0.1": ""}, inplace=True)
            azm_destination.saveDataFrameTocsv(prediction_dir, prediction_file,
                                               df)
            print(
                "File:{} transfered successfully to dir {} of account name {}".
                format(prediction_file, prediction_dir,
                       azm_destination.blob_service_client.account_name))
    except Exception as e:
        print(str(e))
class Preprocessor:
    """
        This class shall  be used to clean and transform the data before training.

        Written By: iNeuron Intelligence
        Version: 1.0
        Revisions: None

        """
    """
    def __init__(self, file_object, logger_object):
        self.file_object = file_object
        self.logger_object = logger_object
    """
    def __init__(self, log_database, log_collection, execution_id):
        self.log_database = log_database
        self.log_collection = log_collection
        self.execution_id = execution_id
        self.log_db_writer = App_LoggerDB(execution_id=execution_id)
        self.az_blob_mgt = AzureBlobManagement()

    def remove_columns(self, data, columns):
        """
                Method Name: remove_columns
                Description: This method removes the given columns from a pandas dataframe.
                Output: A pandas DataFrame after removing the specified columns.
                On Failure: Raise Exception

                Written By: iNeuron Intelligence
                Version: 1.0
                Revisions: None

        """
        #self.logger_object.log(self.file_object, 'Entered the remove_columns method of the Preprocessor class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            "Entered the remove_columns method of the Preprocessor class")
        self.data = data
        self.columns = columns
        try:

            self.useful_data = self.data.drop(
                labels=self.columns,
                axis=1)  # drop the labels specified in the columns
            #self.logger_object.log(self.file_object,
            #                      'Column removal Successful.Exited the remove_columns method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                "Column removal Successful.Exited the remove_columns method of the Preprocessor class"
            )
            return self.useful_data
        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in remove_columns method of the Preprocessor class. Exception message:  '+str(e))
            #self.logger_object.log(self.file_object,
            #                      'Column removal Unsuccessful. Exited the remove_columns method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                "Exception occured in remove_columns method of the Preprocessor class. Exception message:  "
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Column removal Unsuccessful. Exited the remove_columns method of the Preprocessor class'
            )
            raise Exception()

    def separate_label_feature(self, data, label_column_name):
        """
                        Method Name: separate_label_feature
                        Description: This method separates the features and a Label Coulmns.
                        Output: Returns two separate Dataframes, one containing features and the other containing Labels .
                        On Failure: Raise Exception

                        Written By: iNeuron Intelligence
                        Version: 1.0
                        Revisions: None

                """
        #self.logger_object.log(self.file_object, 'Entered the separate_label_feature method of the Preprocessor class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the separate_label_feature method of the Preprocessor class'
        )
        try:
            self.X = data.drop(
                labels=label_column_name, axis=1
            )  # drop the columns specified and separate the feature columns
            self.Y = data[label_column_name]  # Filter the Label columns
            #self.logger_object.log(self.file_object,
            #                       'Label Separation Successful. Exited the separate_label_feature method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Label Separation Successful. Exited the separate_label_feature method of the Preprocessor class'
            )
            return self.X, self.Y
        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in separate_label_feature method of the Preprocessor class. Exception message:  ' + str(e))
            #self.logger_object.log(self.file_object, 'Label Separation Unsuccessful. Exited the separate_label_feature method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in separate_label_feature method of the Preprocessor class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Label Separation Unsuccessful. Exited the separate_label_feature method of the Preprocessor class'
            )
            raise Exception()

    def is_null_present(self, data):
        """
                                Method Name: is_null_present
                                Description: This method checks whether there are null values present in the pandas Dataframe or not.
                                Output: Returns a Boolean Value. True if null values are present in the DataFrame, False if they are not present.
                                On Failure: Raise Exception

                                Written By: iNeuron Intelligence
                                Version: 1.0
                                Revisions: None

                        """
        #self.logger_object.log(self.file_object, 'Entered the is_null_present method of the Preprocessor class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the is_null_present method of the Preprocessor class')
        self.null_present = False
        try:
            self.null_counts = data.isna().sum(
            )  # check for the count of null values per column
            for i in self.null_counts:
                if i > 0:
                    self.null_present = True
                    break
            if (self.null_present
                ):  # write the logs to see which columns have null values
                dataframe_with_null = pd.DataFrame()
                dataframe_with_null['columns'] = data.columns
                dataframe_with_null['missing values count'] = np.asarray(
                    data.isna().sum())
                #dataframe_with_null.to_csv('preprocessing_data/null_values.csv') # storing the null column information to file
                self.az_blob_mgt.saveDataFrameTocsv(
                    "preprocessing-data",
                    "null_values.csv",
                    data_frame=dataframe_with_null)
            #self.logger_object.log(self.file_object,'Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Finding missing values is a success.Data written [preprocessing-data] to  the null values file. Exited the is_null_present method of the Preprocessor class'
            )
            return self.null_present
        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in is_null_present method of the Preprocessor class. Exception message:  ' + str(e))
            #self.logger_object.log(self.file_object,'Finding missing values failed. Exited the is_null_present method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in is_null_present method of the Preprocessor class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Finding missing values failed. Exited the is_null_present method of the Preprocessor class'
            )

            raise Exception()

    def impute_missing_values(self, data):
        """
                                        Method Name: impute_missing_values
                                        Description: This method replaces all the missing values in the Dataframe using KNN Imputer.
                                        Output: A Dataframe which has all the missing values imputed.
                                        On Failure: Raise Exception

                                        Written By: iNeuron Intelligence
                                        Version: 1.0
                                        Revisions: None
                     """
        #self.logger_object.log(self.file_object, 'Entered the impute_missing_values method of the Preprocessor class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the impute_missing_values method of the Preprocessor class'
        )
        self.data = data
        try:
            imputer = KNNImputer(n_neighbors=3,
                                 weights='uniform',
                                 missing_values=np.nan)
            self.new_array = imputer.fit_transform(
                self.data)  # impute the missing values
            # convert the nd-array returned in the step above to a Dataframe
            self.new_data = pd.DataFrame(data=self.new_array,
                                         columns=self.data.columns)
            #self.logger_object.log(self.file_object, 'Imputing missing values Successful. Exited the impute_missing_values method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Imputing missing values Successful. Exited the impute_missing_values method of the Preprocessor class'
            )
            return self.new_data
        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in impute_missing_values method of the Preprocessor class. Exception message:  ' + str(e))
            #self.logger_object.log(self.file_object,'Imputing missing values failed. Exited the impute_missing_values method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in impute_missing_values method of the Preprocessor class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Imputing missing values failed. Exited the impute_missing_values method of the Preprocessor class'
            )

            raise Exception()

    def get_columns_with_zero_std_deviation(self, data):
        """
                                                Method Name: get_columns_with_zero_std_deviation
                                                Description: This method finds out the columns which have a standard deviation of zero.
                                                Output: List of the columns with standard deviation of zero
                                                On Failure: Raise Exception

                                                Written By: iNeuron Intelligence
                                                Version: 1.0
                                                Revisions: None
                             """
        #self.logger_object.log(self.file_object, 'Entered the get_columns_with_zero_std_deviation method of the Preprocessor class')
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the get_columns_with_zero_std_deviation method of the Preprocessor class'
        )

        self.columns = data.columns
        self.data_n = data.describe()
        self.col_to_drop = []
        try:
            for x in self.columns:
                if (self.data_n[x]['std'] == 0
                    ):  # check if standard deviation is zero
                    self.col_to_drop.append(
                        x
                    )  # prepare the list of columns with standard deviation zero
            #self.logger_object.log(self.file_object, 'Column search for Standard Deviation of Zero Successful. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Column search for Standard Deviation of Zero Successful. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class'
            )

            return self.col_to_drop

        except Exception as e:
            #self.logger_object.log(self.file_object,'Exception occured in get_columns_with_zero_std_deviation method of the Preprocessor class. Exception message:  ' + str(e))
            #self.logger_object.log(self.file_object, 'Column search for Standard Deviation of Zero Failed. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class')
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in get_columns_with_zero_std_deviation method of the Preprocessor class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Column search for Standard Deviation of Zero Failed. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class'
            )

            raise Exception()