Exemple #1
0
def getEventAndSubject(data):
    event_type = None
    container = None
    if 'eventType' in data.keys():
        event_type = data['eventType']
    if 'subject' in data.keys():
        start_index = data['subject'].index('containers') + len(
            'containers') + 1
        stop_index = data['subject'].index(
            '/blobs/',
            start_index,
        )
        container = data['subject'][start_index:stop_index]
    if container == 'avnish-yadav':
        if event_type == 'Microsoft.Storage.BlobCreated':
            azm = AzureBlobManagement(connection_string)
            azm_processing_dir = AzureBlobManagement()
            file_names = azm.getAllFileNameFromDirectory(
                directory_name=container)
            file_names = list(
                filter(lambda filename: filename.split(".")[-1] == 'csv',
                       file_names))
            if len(file_names) > 0:
                is_created = azm_processing_dir.createDirectory(
                    "received-prediction", is_replace=True)
                if is_created == True:
                    for file in file_names:
                        df = azm.readCsvFileFromDirectory(container, file)
                        azm_processing_dir.saveDataFrameTocsv(
                            "received-prediction", file, df)
                    for file in file_names:
                        azm.moveFileInDirectory(container, "recycle-bin", file)
                    testing.predictionTest("received-prediction")
                    print(event_type, container)
Exemple #2
0
class Prediction_Data_validation:
    """
               This class shall be used for handling all the validation done on the Raw Prediction Data!!.

               Written By: iNeuron Intelligence
               Version: 1.0
               Revisions: None

               """
    def __init__(self, path, execution_id):
        self.Batch_Directory = path
        self.execution_id = execution_id
        self.collection_name = "strength_schema_prediction"  #code added by Avnish yadav
        self.database_name = "Wafer-sys"  #code added by Avnish yadav
        self.logger_db_writer = App_LoggerDB(
            execution_id=execution_id)  #code added by Avnish yadav
        self.mongdb = MongodbOperation()
        self.az_blob_mgt = AzureBlobManagement()
        self.good_directory_path = "good-raw-file-prediction-validated"
        self.bad_directory_path = "bad-raw-file-prediction-validated"

    def valuesFromSchema(self):
        """
                                Method Name: valuesFromSchema
                                Description: This method extracts all the relevant information from the pre-defined "Schema" file.
                                Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns
                                On Failure: Raise ValueError,KeyError,Exception

                                 Written By: iNeuron Intelligence
                                Version: 1.0
                                Revisions: None

                                        """
        log_database = "strength_prediction_log"
        log_collection = "values_from_schema_validation"
        try:
            log_database = "wafer_prediction_log"
            log_collection = "values_from_schema_validation"
            df_schema_training = self.mongdb.getDataFrameofCollection(
                self.database_name, self.collection_name)
            dic = {}
            [
                dic.update({i: df_schema_training.loc[0, i]})
                for i in df_schema_training.columns
            ]
            del df_schema_training
            #with open(self.schema_path, 'r') as f:
            #    dic = json.load(f)
            #    f.close()
            pattern = dic['SampleFileName']
            LengthOfDateStampInFile = dic['LengthOfDateStampInFile']
            LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile']
            column_names = dic['ColName']
            NumberofColumns = dic['NumberofColumns']

            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            message = "LengthOfDateStampInFile:: %s" % LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile + "\t " + "NumberofColumns:: %s" % NumberofColumns + "\n"
            self.logger_db_writer.log(log_database, log_collection, message)

            #file.close()

        except ValueError:
            self.logger_db_writer.log(
                log_database, log_collection,
                "KeyError:Key value error incorrect key passed")
            raise ValueError

        except KeyError:
            self.logger_db_writer.log(
                log_database, log_collection,
                "KeyError:Key value error incorrect key passed")
            raise KeyError
        except Exception as e:
            self.logger_db_writer.log(log_database, log_collection, str(e))
            raise e

        return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns

    def manualRegexCreation(self):
        """
                                      Method Name: manualRegexCreation
                                      Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file.
                                                  This Regex is used to validate the filename of the prediction data.
                                      Output: Regex pattern
                                      On Failure: None

                                       Written By: iNeuron Intelligence
                                      Version: 1.0
                                      Revisions: None

                                              """
        regex = "['cement_strength']+['\_'']+[\d_]+[\d]+\.csv"
        return regex

    def createDirectoryForGoodBadRawData(self):
        """
                                        Method Name: createDirectoryForGoodBadRawData
                                        Description: This method creates directories to store the Good Data and Bad Data
                                                      after validating the prediction data.

                                        Output: None
                                        On Failure: OSError

                                         Written By: iNeuron Intelligence
                                        Version: 1.0
                                        Revisions: None

                                                """
        log_database = "strength_prediction_log"
        log_collection = "general_log"
        try:
            log_database = "strength_prediction_log"
            log_collection = "general_log"
            self.az_blob_mgt.createDir(self.good_directory_path,
                                       is_replace=True)
            self.az_blob_mgt.createDir(self.bad_directory_path,
                                       is_replace=True)
            msg = self.good_directory_path + " and " + self.bad_directory_path + " created successfully."
            self.logger_db_writer.log(log_database, log_collection, msg)
        except Exception as e:
            msg = "Error Occured in class Prediction_Data_validation method:createDirectoryForGoodBadRawData error: Failed to create directory " + self.good_directory_path + " and " + self.bad_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingGoodDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingGoodDataTrainingFolder
                                            Description: This method deletes the directory made to store the Good Data
                                                          after loading the data in the table. Once the good files are
                                                          loaded in the DB,deleting the directory ensures space optimization.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """
        log_database = "strength_prediction_log"
        log_collection = "general_log"
        try:
            log_database = "strength_prediction_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDir(self.good_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.good_directory_path + " deleted successfully!!")
        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingBadDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingBadDataTrainingFolder
                                            Description: This method deletes the directory made to store the bad Data.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """
        log_database = "strength_prediction_log"
        log_collection = "general_log"

        try:
            log_database = "strength_prediction_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDir(self.bad_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.bad_directory_path + " deleted successfully!!")

        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def moveBadFilesToArchiveBad(self):
        """
                                            Method Name: moveBadFilesToArchiveBad
                                            Description: This method deletes the directory made  to store the Bad Data
                                                          after moving the data in an archive folder. We archive the bad
                                                          files to send them back to the client for invalid data issue.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """
        now = datetime.now()
        date = now.date()
        time = now.strftime("%H%M%S")
        log_database = "strength_prediction_log"
        log_collection = "general_log"

        try:
            log_database = "strength_prediction_log"
            log_collection = "general_log"

            # source = 'Training_Raw_files_validated/Bad_Raw/'
            source = self.bad_directory_path
            destination = "lap-" + self.execution_id
            self.logger_db_writer.log(log_database, log_collection,
                                      "Started moving bad raw data..")
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(source):
                self.az_blob_mgt.moveFileinDir(source, destination, file)
                self.logger_db_writer.log(
                    log_database, log_collection, "File:" + file +
                    " moved to directory:" + destination + " successfully.")

            self.logger_db_writer.log(
                log_database, log_collection,
                "All bad raw file moved to directory:" + destination)

            self.az_blob_mgt.deleteDir(source)
            self.logger_db_writer.log(log_database, log_collection,
                                      "Deleting bad raw directory:" + source)

        except Exception as e:
            self.logger_db_writer.log(
                log_database, log_collection,
                "class Raw_Data_validation method:moveBadFilesToArchiveBad Error while moving bad files to archive:"
                + str(e))
            raise e

    def validationFileNameRaw(self, regex, LengthOfDateStampInFile,
                              LengthOfTimeStampInFile):
        """
            Method Name: validationFileNameRaw
            Description: This function validates the name of the prediction csv file as per given name in the schema!
                         Regex pattern is used to do the validation.If name format do not match the file is moved
                         to Bad Raw Data folder else in Good raw data.
            Output: None
            On Failure: Exception

             Written By: iNeuron Intelligence
            Version: 1.0
            Revisions: None

        """
        # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted.
        self.createDirectoryForGoodBadRawData()
        onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
            self.Batch_Directory)
        try:
            log_database = "strength_prediction_log"
            log_collection = "name_validation_log"
            for filename in onlyfiles:
                if (re.match(regex, filename)):
                    splitAtDot = re.split('.csv', filename)
                    splitAtDot = (re.split('_', splitAtDot[0]))
                    if len(splitAtDot[2]) == LengthOfDateStampInFile:
                        if len(splitAtDot[3]) == LengthOfTimeStampInFile:
                            self.az_blob_mgt.CopyFileinDir(
                                self.Batch_Directory, self.good_directory_path,
                                filename)
                            self.logger_db_writer.log(
                                log_database, log_collection,
                                "Valid File name!! File moved to " +
                                self.good_directory_path + filename)

                        else:
                            self.az_blob_mgt.CopyFileinDir(
                                self.Batch_Directory, self.bad_directory_path,
                                filename)
                            msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                            self.logger_db_writer.log(log_database,
                                                      log_collection, msg)
                    else:
                        self.az_blob_mgt.CopyFileinDir(self.Batch_Directory,
                                                       self.bad_directory_path,
                                                       filename)
                        msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                        self.logger_db_writer.log(log_database, log_collection,
                                                  msg)

                else:
                    self.az_blob_mgt.CopyFileinDir(self.Batch_Directory,
                                                   self.bad_directory_path,
                                                   filename)
                    msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                    self.logger_db_writer.log(log_database, log_collection,
                                              msg)
        except Exception as e:
            msg = "Error occured while validating FileName " + str(e)
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def validateColumnLength(self, NumberofColumns):
        """
                    Method Name: validateColumnLength
                    Description: This function validates the number of columns in the csv files.
                                 It is should be same as given in the schema file.
                                 If not same file is not suitable for processing and thus is moved to Bad Raw Data folder.
                                 If the column number matches, file is kept in Good Raw Data for processing.
                                The csv file is missing the first column name, this function changes the missing name to "Wafer".
                    Output: None
                    On Failure: Exception

                     Written By: iNeuron Intelligence
                    Version: 1.0
                    Revisions: None

             """
        try:
            log_database = "strength_prediction_log"
            log_collection = "column_validation_log"
            self.logger_db_writer.log(log_database, log_collection,
                                      "Column length validation Started!!")
            #for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'):
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                #csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file)
                csv = self.az_blob_mgt.readCSVFilefromDir(
                    self.good_directory_path, file)
                print(csv.shape)
                if csv.shape[1] == NumberofColumns:
                    #csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    print(csv)
                    #csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True)
                    self.az_blob_mgt.saveDataFrametoCSV(
                        self.good_directory_path,
                        file,
                        csv,
                        index=None,
                        header=True)
                else:
                    self.az_blob_mgt.moveFileinDir(self.good_directory_path,
                                                   self.bad_directory_path,
                                                   file)
                    self.logger_db_writer.log(
                        log_database, log_collection,
                        "Invalid Column Length for the file!! "
                        "File moved to Bad Raw Folder :: %s" % file)
            self.logger_db_writer.log(log_database, log_collection,
                                      "Column Length Validation Completed!!")

        except Exception as e:
            self.logger_db_writer.log(log_database, log_collection,
                                      'Error Occured::' + str(e))
            raise e

    def deletePredictionFile(self):
        try:
            log_database = "strenth_prediction_log"
            log_collection = "general_log"
            directory = "prediction-file"
            filename = "Prediction.csv"
            if directory in self.az_blob_mgt.dir_list:
                filenames = self.az_blob_mgt.getAllFileNameFromDirectory(
                    directory_name=directory)
                if filename in filenames:
                    self.az_blob_mgt.deleteFilefromDir(
                        directory_name=directory, filename=filename)
                    self.logger_db_writer.log(
                        log_database, log_collection, filename +
                        " is deleted from dir:" + directory + " successfully")
        except Exception as e:

            self.logger_db_writer.log(
                log_database, log_collection,
                "Error occure while deleting prediction file from prediction-file directory"
                + str(e))
            raise e

    def validateMissingValuesInWholeColumn(self):
        """
                                  Method Name: validateMissingValuesInWholeColumn
                                  Description: This function validates if any column in the csv file has all values missing.
                                               If all the values are missing, the file is not suitable for processing.
                                               SUch files are moved to bad raw data.
                                  Output: None
                                  On Failure: Exception

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                              """
        try:
            log_database = "strength_prediction_log"
            log_collection = "missing_values_in_column"
            #f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+')
            #self.logger.log(f, "Missing Values Validation Started!!")
            self.logger_db_writer.log(log_database, log_collection,
                                      "Missing Values Validation Started!!")

            #for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'):
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                #csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file)
                csv = self.az_blob_mgt.readCSVFilefromDir(
                    self.good_directory_path, file)
                print(csv)
                count = 0
                for columns in csv:
                    if (len(csv[columns]) - csv[columns].count()) == len(
                            csv[columns]):
                        count += 1
                        #shutil.move("Prediction_Raw_Files_Validated/Good_Raw/" + file,
                        #            "Prediction_Raw_Files_Validated/Bad_Raw")
                        self.az_blob_mgt.moveFileinDir(
                            self.good_directory_path, self.bad_directory_path,
                            file)
                        #self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
                        self.logger_db_writer.log(
                            log_database, log_collection,
                            "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s"
                            % file)

                        break
                if count == 0:
                    csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    print("column unnamed may not be present")
                    self.az_blob_mgt.saveDataFrametoCSV(
                        self.good_directory_path,
                        file,
                        csv,
                        index=None,
                        header=True)
                    #csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True)
        except Exception as e:

            self.logger_db_writer.log(log_database, log_collection,
                                      "Error occured:" + str(e))
            raise e
class File_Operation:
    """
                This class shall be used to save the model after training
                and load the saved model for prediction.

                Written By: iNeuron Intelligence
                Version: 1.0
                Revisions: None

                """
    def __init__(self, log_database, log_collection, execution_id):
        #self.file_object = file_object
        #self.logger_object = logger_object
        #self.model_directory='models/'

        self.log_database = log_database
        self.log_collection = log_collection
        self.execution_id = execution_id
        self.log_db_writer = App_LoggerDB(execution_id=self.execution_id)
        self.model_directory = 'model'
        self.az_blob_mgt = AzureBlobManagement()

    def save_model(self, model, filename):
        """`
            Method Name: save_model
            Description: Save the model file to directory
            Outcome: File gets saved
            On Failure: Raise Exception

            Written By: iNeuron Intelligence
            Version: 1.0
            Revisions: None
"""
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the save_model method of the File_Operation class')
        directory_name = self.model_directory + '-' + filename
        try:
            self.az_blob_mgt.createDir(
                directory_name, is_replace=True)  # create or replace directory
            # path = os.path.join(self.model_directory,filename) #create seperate directory for each cluster
            # if os.path.isdir(path): #remove previously existing models for each clusters
            #     shutil.rmtree(self.model_directory)
            #     os.makedirs(path)
            #else:
            #     os.makedirs(path) #

            # with open(path +'/' + filename+'.sav',
            #           'wb') as f:
            #     pickle.dump(model, f) # save the model to file
            self.az_blob_mgt.saveObject(directory_name=directory_name,
                                        filename=filename + '.sav',
                                        object_name=model)
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Model File ' + filename +
                ' saved. Exited the save_model method of the Model_Finder class'
            )

            return 'success'
        except Exception as e:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in save_model method of the'
                ' Model_Finder class. Exception message:  ' + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Model File ' + filename +
                ' could not be saved. Exited the save_model method of the Model_Finder class'
            )
            raise Exception()

    def load_model(self, filename):
        """
                    Method Name: load_model
                    Description: load the model file to memory
                    Output: The Model file loaded in memory
                    On Failure: Raise Exception

                    Written By: iNeuron Intelligence
                    Version: 1.0
                    Revisions: None
        """
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the load_model method of the File_Operation class')
        try:
            directory = self.model_directory + '-' + filename
            filename = filename + '.sav'
            object_model = self.az_blob_mgt.loadObject(directory, filename)
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Model File ' + filename + ' loaded. Exited '
                'the load_model method of the Model_Finder class')

            #with open(self.model_directory + filename + '/' + filename + '.sav',
            #          'rb') as f:
            #    self.log_db_writer.log(self.log_database,self.log_collection,
            #                           'Model File ' + filename + ' loaded. Exited the load_model method of the Model_Finder class')
            return object_model
        except Exception as e:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in load_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Model File ' + filename +
                ' could not be saved. Exited the load_model method of the Model_Finder class'
            )
            raise Exception()

    def find_correct_model_file(self, cluster_number):
        """
                            Method Name: find_correct_model_file
                            Description: Select the correct model based on cluster number
                            Output: The Model file
                            On Failure: Raise Exception

                            Written By: iNeuron Intelligence
                            Version: 1.0
                            Revisions: None
                """
        self.log_db_writer.log(
            self.log_database, self.log_collection,
            'Entered the find_correct_model_file method of the File_Operation class'
        )
        try:
            self.cluster_number = cluster_number
            self.folder_name = self.model_directory
            self.list_of_model_files = []
            self.required_files = self.az_blob_mgt.dir_list
            self.list_of_files = []
            # selecting model directory only
            #self.list_of_files = os.listdir(self.folder_name)
            for dir in self.required_files:
                if re.search("^model[-][a-zA-z]{2,17}[0-9]", dir):
                    self.list_of_files.append(dir)

            for self.file in self.list_of_files:
                try:
                    #selecting model file name in models
                    models = self.az_blob_mgt.getAllFileNameFromDirectory(
                        self.file)
                    for model_name_ in models:
                        if (model_name_.index(str(self.cluster_number)) != -1):
                            self.model_name = model_name_
                except:
                    continue
            self.model_name = self.model_name.split('.')[0]
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exited the find_correct_model_file method of the Model_Finder class.'
            )
            return self.model_name
        except Exception as e:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message:  '
                + str(e))
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Exited the find_correct_model_file method of the Model_Finder class with Failure'
            )
            raise Exception()
Exemple #4
0
class Raw_Data_validation:
    """
             This class shall be used for handling all the validation done on the Raw Training Data!!.

             Written By: iNeuron Intelligence
             Version: 1.0
             Revisions: None

             """
    def __init__(self, path, execution_id):
        self.Batch_Directory = path
        self.execution_id = execution_id
        #self.schema_path = 'schema_training.json'
        self.collection_name = "schema-training"  #code added by Avnish yadav
        self.database_name = "Wafer-sys"  #code added by Avnish yadav
        self.logger_db_writer = App_LoggerDB(execution_id=execution_id)
        self.mongdb = MongodbOperation()
        self.az_blob_mgt = AzureBlobManagement()
        self.good_directory_path = "good-raw-file-train-validated"
        self.bad_directory_path = "bad-raw-file-train-validated"

    def valuesFromSchema(self):
        """
                        Method Name: valuesFromSchema
                        Description: This method extracts all the relevant information from the pre-defined "Schema" file.
                        Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns
                        On Failure: Raise ValueError,KeyError,Exception

                         Written By: iNeuron Intelligence
                        Version: 1.0
                        Revisions: None

                                """
        log_database = "wafer_training_log"
        log_collection = "values_from_schema_validation"
        try:

            #with open(self.schema_path, 'r') as f:
            #    dic = json.load(f)
            #    f.close()

            log_database = "wafer_training_log"
            log_collection = "values_from_schema_validation"
            df_schema_training = self.mongdb.getDataFrameofCollection(
                self.database_name, self.collection_name)
            dic = {}
            for i in df_schema_training.columns:
                dic.update({i: df_schema_training.loc[0, i]})
            #[dic.update({i: df_schema_training.loc[0, i]}) for i in df_schema_training.columns]
            print(dic)
            del df_schema_training

            pattern = dic['SampleFileName']
            LengthOfDateStampInFile = dic['LengthOfDateStampInFile']
            LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile']
            column_names = dic['ColName']
            NumberofColumns = dic['NumberofColumns']

            #file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            message = "LengthOfDateStampInFile:: %s" % LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile + "\t " + "NumberofColumns:: %s" % NumberofColumns + "\n"
            self.logger_db_writer.log(log_database, log_collection, message)

            #file.close()

        except ValueError:
            file = open("Training_Logs/valuesfromSchemaValidationLog.txt",
                        'a+')
            self.logger_db_writer.log(
                log_database, log_collection,
                "ValueError:Value not found inside schema_training.json")
            file.close()
            raise ValueError

        except KeyError:
            file = open("Training_Logs/valuesfromSchemaValidationLog.txt",
                        'a+')
            self.logger_db_writer.log(
                log_database, log_collection,
                "KeyError:Key value error incorrect key passed")
            file.close()
            raise KeyError

        except Exception as e:
            file = open("Training_Logs/valuesfromSchemaValidationLog.txt",
                        'a+')
            self.logger_db_writer.log(log_database, log_collection, str(e))
            file.close()
            raise e

        return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns

    def manualRegexCreation(self):
        """
                                Method Name: manualRegexCreation
                                Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file.
                                            This Regex is used to validate the filename of the training data.
                                Output: Regex pattern
                                On Failure: None

                                 Written By: iNeuron Intelligence
                                Version: 1.0
                                Revisions: None

                                        """
        regex = "['wafer']+['\_'']+[\d_]+[\d]+\.csv"
        return regex

    def createDirectoryForGoodBadRawData(self):
        """
                                      Method Name: createDirectoryForGoodBadRawData
                                      Description: This method creates directories to store the Good Data and Bad Data
                                                    after validating the training data.

                                      Output: None
                                      On Failure: OSError

                                       Written By: iNeuron Intelligence
                                      Version: 1.0
                                      Revisions: None

                                              """
        """try:
            path = os.path.join("Training_Raw_files_validated/", "Good_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)
            path = os.path.join("Training_Raw_files_validated/", "Bad_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)"""
        log_database = "wafer_training_log"
        log_collection = "general_log"
        try:
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.createDir(self.good_directory_path,
                                       is_replace=True)
            self.az_blob_mgt.createDir(self.bad_directory_path,
                                       is_replace=True)
            msg = self.good_directory_path + " and " + self.bad_directory_path + " created successfully."
            print(msg)
            self.logger_db_writer.log(log_database, log_collection, msg)
        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:createDirectoryForGoodBadRawData error: Failed to create directory " + self.good_directory_path + " and " + self.bad_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingGoodDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingGoodDataTrainingFolder
                                            Description: This method deletes the directory made  to store the Good Data
                                                          after loading the data in the table. Once the good files are
                                                          loaded in the DB,deleting the directory ensures space optimization.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """

        #try:
        #path = 'Training_Raw_files_validated/'
        ### if os.path.isdir("ids/" + userName):
        ## if os.path.isdir(path + 'Bad_Raw/'):
        ##     shutil.rmtree(path + 'Bad_Raw/')
        #if os.path.isdir(path + 'Good_Raw/'):
        #    shutil.rmtree(path + 'Good_Raw/')
        #    file = open("Training_Logs/GeneralLog.txt", 'a+')
        #    self.logger.log(file,"GoodRaw directory deleted successfully!!!")
        #    file.close()
        log_database = "wafer_training_log"
        log_collection = "general_log"
        try:
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDir(self.good_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.good_directory_path + " deleted successfully!!")

        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def deleteExistingBadDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingBadDataTrainingFolder
                                            Description: This method deletes the directory made to store the bad Data.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None                                                   """
        #try:
        #  #  path = 'Training_Raw_files_validated/'
        #    if os.path.isdir(path + 'Bad_Raw/'):
        #        shutil.rmtree(path + 'Bad_Raw/')
        #        file = open("Training_Logs/GeneralLog.txt", 'a+')
        #        self.logger.log(file,"BadRaw directory deleted before starting validation!!!")
        #        file.close()
        #except OSError as s:
        #    file = open("Training_Logs/GeneralLog.txt", 'a+')
        #    self.logger.log(file,"Error while Deleting Directory : %s" %s)
        #    file.close()
        #    raise OSError
        log_database = "wafer_training_log"
        log_collection = "general_log"
        try:
            log_database = "wafer_training_log"
            log_collection = "general_log"
            self.az_blob_mgt.deleteDir(self.bad_directory_path)
            self.logger_db_writer.log(
                log_database, log_collection,
                self.bad_directory_path + " deleted successfully!!")

        except Exception as e:
            msg = "Error Occured in class Raw_Data_validation method:deleteExistingGoodDataTrainingFolder Error occured while deleting :" + self.good_directory_path
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e

    def moveBadFilesToArchiveBad(self):
        """
                                            Method Name: moveBadFilesToArchiveBad
                                            Description: This method deletes the directory made  to store the Bad Data
                                                          after moving the data in an archive folder. We archive the bad
                                                          files to send them back to the client for invalid data issue.
                                            Output: None
                                            On Failure: OSError

                                             Written By: iNeuron Intelligence
                                            Version: 1.0
                                            Revisions: None

                                                    """
        now = datetime.now()
        date = now.date()
        time = now.strftime("%H%M%S")
        #try:
        #source = 'Training_Raw_files_validated/Bad_Raw/'
        #if os.path.isdir(source):
        #    path = "TrainingArchiveBadData"
        #    if not os.path.isdir(path):
        #        os.makedirs(path)
        #    dest = 'TrainingArchiveBadData/BadData_' + str(date)+"_"+str(time)
        #    if not os.path.isdir(dest):
        #        os.makedirs(dest)
        #    files = os.listdir(source)
        #    for f in files:
        #        if f not in os.listdir(dest):
        #            shutil.move(source + f, dest)
        #    file = open("Training_Logs/GeneralLog.txt", 'a+')
        #    self.logger.log(file,"Bad files moved to archive")
        #    path = 'Training_Raw_files_validated/'
        #    if os.path.isdir(path + 'Bad_Raw/'):
        #        shutil.rmtree(path + 'Bad_Raw/')
        #    self.logger.log(file,"Bad Raw Data Folder Deleted successfully!!")
        #    file.close() #
        log_database = "wafer_training_log"
        log_collection = "general_log"
        try:

            log_database = "wafer_training_log"
            log_collection = "general_log"

            # source = 'Training_Raw_files_validated/Bad_Raw/'
            source = self.bad_directory_path
            destination = "lat-" + self.execution_id
            self.logger_db_writer.log(log_database, log_collection,
                                      "Started moving bad raw data..")
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(source):

                self.az_blob_mgt.moveFileinDir(source, destination, file)
                self.logger_db_writer.log(
                    log_database, log_collection, "File:" + file +
                    " moved to directory:" + destination + " successfully.")

            self.logger_db_writer.log(
                log_database, log_collection,
                "All bad raw file moved to directory:" + destination)

            self.az_blob_mgt.deleteDir(source)
            self.logger_db_writer.log(log_database, log_collection,
                                      "Deleting bad raw directory:" + source)

        except Exception as e:
            self.logger_db_writer.log(
                log_database, log_collection,
                "class Raw_Data_validation method:moveBadFilesToArchiveBad "
                "Error while moving bad files to archive:" + str(e))
            raise e

    def validationFileNameRaw(self, regex, LengthOfDateStampInFile,
                              LengthOfTimeStampInFile):
        """
                    Method Name: validationFileNameRaw
                    Description: This function validates the name of the training csv files as per given name in the schema!
                                 Regex pattern is used to do the validation.If name format do not match the file is moved
                                 to Bad Raw Data folder else in Good raw data.
                    Output: None
                    On Failure: Exception

                     Written By: iNeuron Intelligence
                    Version: 1.0
                    Revisions: None

                """

        #pattern = "['Wafer']+['\_'']+[\d_]+[\d]+\.csv"
        # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted.
        #self.deleteExistingBadDataTrainingFolder()
        #self.deleteExistingGoodDataTrainingFolder()
        #create new directories
        self.createDirectoryForGoodBadRawData()
        onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
            self.Batch_Directory)
        #onlyfiles = [f for f in listdir(self.Batch_Directory)]
        log_database = "wafer_training_log"
        log_collection = "name_validation_log"
        try:
            log_database = "wafer_training_log"
            log_collection = "name_validation_log"
            #f = open("Training_Logs/nameValidationLog.txt", 'a+')
            for filename in onlyfiles:
                if (re.match(regex, filename)):
                    splitAtDot = re.split('.csv', filename)
                    splitAtDot = (re.split('_', splitAtDot[0]))
                    if len(splitAtDot[1]) == LengthOfDateStampInFile:
                        if len(splitAtDot[2]) == LengthOfTimeStampInFile:
                            #shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Good_Raw")
                            #self.logger.log(f,"Valid File name!! File moved to GoodRaw Folder :: %s" % filename)
                            self.az_blob_mgt.CopyFileinDir(
                                self.Batch_Directory, self.good_directory_path,
                                filename)
                            self.logger_db_writer.log(
                                log_database, log_collection,
                                "Valid File name!! File moved to " +
                                self.good_directory_path + filename)

                        else:
                            #shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                            #self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                            self.az_blob_mgt.CopyFileinDir(
                                self.Batch_Directory, self.bad_directory_path,
                                filename)
                            msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                            self.logger_db_writer.log(log_database,
                                                      log_collection, msg)
                    else:
                        #shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                        #self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                        self.az_blob_mgt.CopyFileinDir(self.Batch_Directory,
                                                       self.bad_directory_path,
                                                       filename)
                        msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                        self.logger_db_writer.log(log_database, log_collection,
                                                  msg)
                else:
                    #shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                    #self.logger.log(f, "Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                    self.az_blob_mgt.CopyFileinDir(self.Batch_Directory,
                                                   self.bad_directory_path,
                                                   filename)
                    msg = "Invalid File Name !! File moved to " + self.bad_directory_path + filename
                    self.logger_db_writer.log(log_database, log_collection,
                                              msg)

        # f.close()

        except Exception as e:

            msg = "Error occured while validating FileName " + str(e)
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e
        #f = open("Training_Logs/nameValidationLog.txt", 'a+')
        #self.logger.log(f, "Error occured while validating FileName %s" % e)
        #f.close()
        #raise e

    def validateColumnLength(self, NumberofColumns):
        """
                          Method Name: validateColumnLength
                          Description: This function validates the number of columns in the csv files.
                                       It is should be same as given in the schema file.
                                       If not same file is not suitable for processing and thus is moved to Bad Raw Data folder.
                                       If the column number matches, file is kept in Good Raw Data for processing.
                                      The csv file is missing the first column name, this function changes the missing name to "Wafer".
                          Output: None
                          On Failure: Exception

                           Written By: iNeuron Intelligence
                          Version: 1.0
                          Revisions: None

                      """
        log_collection = "column_validation_log"
        log_database = "wafer_training_log"
        try:
            log_collection = "column_validation_log"
            log_database = "wafer_training_log"
            #f = open("Training_Logs/columnValidationLog.txt", 'a+')
            #self.logger.log(f,"Column Length Validation Started!!")
            self.logger_db_writer.log(log_database, log_collection,
                                      "Column Length Validation Started!!")
            print("column lenght validation started")

            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                #csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
                print(file)

                csv = self.az_blob_mgt.readCSVFilefromDir(
                    self.good_directory_path, file)
                print(csv.shape)

                if csv.shape[1] == NumberofColumns:
                    csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    print("pass statement ")
                    pass
            #for file in listdir('Training_Raw_files_validated/Good_Raw/'):
            #    csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
            #    if csv.shape[1] == NumberofColumns:
            #  #      pass
                else:
                    self.az_blob_mgt.moveFileinDir(self.good_directory_path,
                                                   self.bad_directory_path,
                                                   file)
                    msg = "Invalid Column Length for the file!! File moved to " + self.bad_directory_path + "file:" + file
                    self.logger_db_writer.log(log_database, log_collection,
                                              msg)
                self.logger_db_writer.log(
                    log_database, log_collection,
                    "Column Length Validation Completed!!")

        except Exception as e:
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error Occured in class Raw_Data_validation method: validateColumnLength error:"
                + str(e))
            raise e

        #        shutil.move("Training_Raw_files_validated/Good_Raw/" + file, "Training_Raw_files_validated/Bad_Raw")
        #  #      self.logger.log(f, "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
        #self.logger.log(f, "Column Length Validation Completed!!")
        #except OSError:
        #    f = open("Training_Logs/columnValidationLog.txt", 'a+')
        #    self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
        #    f.close()
        #    raise OSError
        #except Exception as e:
        #    f = open("Training_Logs/columnValidationLog.txt", 'a+')
        #    self.logger.log(f, "Error Occured:: %s" % e)
        #    f.close()
        #   # raise e
        #f.close()

    def validateMissingValuesInWholeColumn(self):
        """
                                  Method Name: validateMissingValuesInWholeColumn
                                  Description: This function validates if any column in the csv file has all values missing.
                                               If all the values are missing, the file is not suitable for processing.
                                               SUch files are moved to bad raw data.
                                  Output: None
                                  On Failure: Exception

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                              """
        log_database = "wafer_training_log"
        log_collection = "missing_values_in_column"
        try:

            log_database = "wafer_training_log"
            log_collection = "missing_values_in_column"
            self.logger_db_writer.log(log_database, log_collection,
                                      "Missing Values Validation Started!!")

            #f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            #self.logger.log(f,"Missing Values Validation Started!!")

            #for file in listdir('Training_Raw_files_validated/Good_Raw/'):
            #    csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
            for file in self.az_blob_mgt.getAllFileNameFromDirectory(
                    self.good_directory_path):
                csv = self.az_blob_mgt.readCSVFilefromDir(
                    self.good_directory_path, file)
                print(csv)
                count = 0
                for columns in csv:
                    print(columns)
                    if (len(csv[columns]) - csv[columns].count()) == len(
                            csv[columns]):
                        count += 1
                        #shutil.move("Training_Raw_files_validated/Good_Raw/" + file,
                        #            "Training_Raw_files_validated/Bad_Raw")
                        #self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
                        self.az_blob_mgt.moveFileinDir(
                            self.good_directory_path, self.bad_directory_path,
                            file)
                        msg = "Invalid Column Length for the file!! File moved to " + self.bad_directory_path + ":: %s" % file
                        self.logger_db_writer.log(log_database, log_collection,
                                                  msg)
                        break
                if count == 0:
                    print("entering rename")
                    csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    self.az_blob_mgt.saveDataFrametoCSV(
                        self.good_directory_path,
                        file,
                        csv,
                        index=None,
                        header=True)
                    #csv.to_csv("Training_Raw_files_validated/Good_Raw/" + file, index=None, header=True)
        #except OSError:
        #    f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
        #    self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
        #    f.close()
        #    raise OSError
        except Exception as e:

            #    f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            #  #  self.logger.log(f, "Error Occured:: %s" % e)
            #    f.close()
            #    raise e
            #f.close()
            self.logger_db_writer.log(
                log_database, log_collection,
                "Error Occured class:Raw_Data_validation method:validateMissingValuesInWholeColumn error:"
                + str(e))
            raise e


#path = 'training-batch-files'
#noofcolumns=592
#a=Raw_Data_validation
#res=a.validateMissingValuesInWholeColumn(a(path,333))
#print(res)
class DbOperationMongoDB:
    """
          This class shall be used for handling all the mongodb operations.

          Written By: iNeuron Intelligence
          Version: 1.0
          Revisions: None

          """

    def __init__(self, execution_id):
        self.mongodb=MongodbOperation()
        self.az_blob_mgt=AzureBlobManagement()
        self.logger_db_writer=App_LoggerDB(execution_id=execution_id)
        self.good_file_path="good-raw-file-prediction-validated"
        self.bad_file_path="bad-raw-file-prediction-validated"


    def insertIntoTableGoodData(self,column_names):
        """
        Description: Load all csv file into mongo db database "prediction_database" ,collection:"Good_Raw_Data"
        from azure storage -good data  and clear the storage space.

        :return:
        """
        try:
            prediction_database="prediction_database" # mongodb name
            prediction_collection="Good_Raw_Data" # mongodb name
            database_name = "wafer_prediction_log" ## logger name
            collection_name = "db_insert_log" ## logger name
            self.mongodb.dropCollection(prediction_database,prediction_collection)
            self.logger_db_writer.log(database_name,collection_name,"Droping collection:"+prediction_collection+" from database:"+prediction_database)
            self.logger_db_writer.log(database_name, collection_name,"Starting loading of good files in database:training_database and collection: Good_Raw_Data")
            files = self.az_blob_mgt.getAllFileNameFromDirectory(self.good_file_path)
            self.logger_db_writer.log(database_name, collection_name,"No of file found in good-raw-file-train-validated " + str(len(files)))
            for file in files:
                try:
                    self.logger_db_writer.log(database_name, collection_name,
                                              "Insertion of file +" + file + " started...")
                    df = self.az_blob_mgt.readCSVFilefromDir(self.good_file_path, file)
                    df.columns=column_names
                    print("dataframe before insertion")
                    print(df)
                    self.mongodb.insertDataFrame(prediction_database, prediction_collection, df)
                    self.logger_db_writer.log(database_name, collection_name,
                                              "File: {0} loaded successfully".format(file))
                except Exception as e:
                    self.logger_db_writer.log(database_name, collection_name, str(e))
                    self.az_blob_mgt.moveFileinDir(self.good_file_path, self.bad_file_path, file)
                    self.logger_db_writer.log(database_name, collection_name,
                                              "File: " + file + " was not loaded successfully hence moved to dir:" + self.bad_file_path)

        except Exception as e:
            error_message = "Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:" + str(e)
            self.logger_db_writer.log(database_name, collection_name, error_message)

    def selectingDatafromtableintocsv(self,):
        """

        :return:
        """
        try:
            directory_name="prediction-file-from-db" # azure storage name
            file_name="InputFile.csv" # azure storage name
            database_name = "wafer_prediction_log" # logger name
            collection_name = "export_to_csv" # logger name
            prediction_database="prediction_database" # mongodb name
            prediction_collection="Good_Raw_Data" # mongodb name
            msg="starting of loading of database:"+prediction_database+",collection:"+prediction_collection+" records into file:"+file_name
            self.logger_db_writer.log(database_name,collection_name,msg)
            df=self.mongodb.getDataFrameofCollection(prediction_database,prediction_collection)
            print("after dataframe from db extraction")
            print(df)
            msg="Good_Raw_data has been loaded into pandas dataframe"
            print(msg)
            self.logger_db_writer.log(database_name,collection_name,msg)
            self.az_blob_mgt.saveDataFrametoCSV(directory_name,file_name,df,index=None,header=True)
            # since the inputFile.csv has unammed column, added index =0 later removed as unnamed 0.1 is present in predictionDatavalidation.py ln 410.
            msg = "InputFile.csv created successfully in directory"+directory_name
            print(msg)
            self.logger_db_writer.log(database_name, collection_name, msg)
        except Exception as e:
            msg="Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:"+str(e)
            self.logger_db_writer.log(database_name,collection_name,msg)
Exemple #6
0
class DbOperationMongoDB:
    """
      This class shall be used for handling all the SQL operations.

      Written By: iNeuron Intelligence
      Version: 1.0
      Revisions: None

      """
    def __init__(self, execution_id):
        self.mongodb = MongodbOperation()
        self.az_blob_mgt = AzureBlobManagement()
        self.logger_db_writer = App_LoggerDB(execution_id=execution_id)
        self.good_file_path = "good-raw-file-train-validated"
        self.bad_file_path = "bad-raw-file-train-validated"

    def insertIntoTableGoodData(self, column_name):
        """
        Description: Load all csv file into mongo db database "training_database" ,collection:"Good_Raw_Data"

        :return:
        """
        database_name = "wafer_training_log"
        collection_name = "db_insert_log"

        try:
            database_name = "wafer_training_log"  # logger name
            collection_name = "db_insert_log"  # logger name
            self.logger_db_writer.log(
                database_name, collection_name,
                "Droping existing collection if present in database training_database"
            )
            self.mongodb.dropCollection("training_database", "Good_Raw_Data")

            self.logger_db_writer.log(
                database_name, collection_name,
                "Starting loading of good files in database:training_database and collection: Good_Raw_Data"
            )
            files = self.az_blob_mgt.getAllFileNameFromDirectory(
                self.good_file_path)
            self.logger_db_writer.log(
                database_name, collection_name,
                "No of file found in good-raw-file-train-validated " +
                str(len(files)))
            for file in files:
                try:
                    self.logger_db_writer.log(
                        database_name, collection_name,
                        "Insertion of file " + file + " started...")
                    df = self.az_blob_mgt.readCSVFilefromDir(
                        self.good_file_path, file)
                    df.columns = column_name
                    self.mongodb.insertDataFrame("training_database",
                                                 "Good_Raw_Data", df)
                    self.logger_db_writer.log(
                        database_name, collection_name,
                        "File: {0} loaded successfully".format(file))
                except Exception as e:
                    self.logger_db_writer.log(database_name, collection_name,
                                              str(e))
                    self.az_blob_mgt.moveFileinDir(self.good_file_path,
                                                   self.bad_file_path, file)
                    self.logger_db_writer.log(
                        database_name, collection_name, "File " + file +
                        " was not loaded successfully hence moved tp dir:" +
                        self.bad_file_path)

        except Exception as e:
            error_message = "Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:" + str(
                e)
            self.logger_db_writer.log(database_name, collection_name,
                                      error_message)

    def selectingDatafromtableintocsv(self, ):
        """

        :return:
        """
        database_name = "wafer_training_log"
        collection_name = "export_to_csv"
        try:
            directory_name = "training-file-from-db"
            file_name = "InputFile"

            training_database = "training_database"
            training_collection = "Good_Raw_Data"
            msg = "starting of loading of database:training_database,collection:Good_Raw_Data records into InputFile.csv"
            print(msg)
            self.logger_db_writer.log(database_name, collection_name, msg)
            df = self.mongodb.getDataFrameofCollection(training_database,
                                                       training_collection)
            print(df)
            msg = "Good_Raw_data has been loaded into pandas dataframe"
            self.logger_db_writer.log(database_name, collection_name, msg)
            self.az_blob_mgt.saveDataFrametoCSV(directory_name, file_name, df)
            msg = "InputFile.csv created successfully in directory " + directory_name
            self.logger_db_writer.log(database_name, collection_name, msg)
        except Exception as e:
            msg = "Error occured in class:DbOperationMongoDB method:insertIntoTableGoodData error:" + str(
                e)
            self.logger_db_writer.log(database_name, collection_name, msg)
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: iNeuron Intelligence
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self, execution_id):
        self.execution_id = execution_id
        #self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.goodDataPath = "good-raw-file-prediction-validated"
        #self.logger = App_Logger()
        self.log_db_writer = App_LoggerDB(execution_id=execution_id)
        self.log_database = "wafer_prediction_log"
        self.az_blob_mgt = AzureBlobManagement()

    def replaceMissingWithNull(self):
        """
                                  Method Name: replaceMissingWithNull
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                                          """
        log_collection = "data_transform_log"
        try:
            log_collection = "data_transform_log"
            onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
                self.goodDataPath)
            print(onlyfiles)
            for file in onlyfiles:
                csv = self.az_blob_mgt.readCSVFilefromDir(
                    self.goodDataPath, file)
                csv.fillna('NULL', inplace=True)

                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                csv['Wafer'] = csv['Wafer'].str[6:]
                #print(csv)
                print("before updating index")
                #csv.to_csv(self.goodDataPath+ "/" + file, index=None, header=True)
                self.az_blob_mgt.saveDataFrametoCSV(self.goodDataPath,
                                                    file,
                                                    csv,
                                                    index=None,
                                                    header=True)
                print("after updating index")
                #self.logger.log(log_file," %s: File Transformed successfully!!" % file)
                self.log_db_writer.log(
                    self.log_database, log_collection,
                    "File {0} transformed successfully".format(file))
                print('File transformed replace missing with null succccc')
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")

        except Exception as e:
            self.log_db_writer.log(
                self.log_database, log_collection,
                'Data Transformation failed because:' + str(e))
            raise e
Exemple #8
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

               Written By: iNeuron Intelligence
               Version: 1.0
               Revisions: None

               """
    def __init__(self, execution_id):
        self.goodDataPath = "good-raw-file-train-validated"
        self.execution_id = execution_id
        self.logger_db_writer = App_LoggerDB(execution_id)
        self.az_blob_mgt = AzureBlobManagement()

        #self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        #self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                                           Method Name: replaceMissingWithNull
                                           Description: This method replaces the missing values in columns with "NULL" to
                                                        store in the table. We are using substring in the first column to
                                                        keep only "Integer" data for ease up the loading.
                                                        This column is anyways going to be removed during training.

                                            Written By: iNeuron Intelligence
                                           Version: 1.0
                                           Revisions: None

                                                   """
        log_collection = "data_transform_log"
        log_database = "wafer_training_log"

        #log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            #onlyfiles = [f for f in listdir(self.goodDataPath)]
            onlyfiles = self.az_blob_mgt.getAllFileNameFromDirectory(
                self.goodDataPath)
            for file in onlyfiles:
                csv = self.az_blob_mgt.readCSVFilefromDir(self.goodDataPath,
                                                          filename=file)
                csv.fillna('NULL', inplace=True)
                csv["Wafer"] = csv["Wafer"].str[6:]
                self.az_blob_mgt.saveDataFrametoCSV(self.goodDataPath,
                                                    file,
                                                    csv,
                                                    index=None,
                                                    header=True)
                self.logger_db_writer.log(
                    log_database, log_collection,
                    "File {0} Transformed successfully!!".format(file))

            # csv = pandas.read_csv(self.goodDataPath+"/" + file)
            # csv.fillna('NULL',inplace=True)
            # # #csv.update("'"+ csv['Wafer'] +"'")
            # # csv.update(csv['Wafer'].astype(str))
            # csv['Wafer'] = csv['Wafer'].str[6:]
            # csv.to_csv(self.goodDataPath+ "/" + file, index=None, header=True)
            # self.logger.log(log_file," %s: File Transformed successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:

            msg = "Error occured in class:dataTransform method:replaceMissingWithNull error:Data Transformation failed because:" + str(
                e)
            self.logger_db_writer.log(log_database, log_collection, msg)
            raise e