class train_validation: def __init__(self,path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files!!') # extracting values from prediction schema # we have removed some unnecessary columns from the schema itself # so that the DB columns won't get created for them and we won't waste space. # Un Necessary Columns: 'url','address','name','dish_liked','phone','reviews_list' LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema() # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") # below function adds quotes to the '?' values in some columns. self.dataTransform.addQuotesToStringValuesInColumn() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log(self.file_object, "Creating Training_Database and tables on the basis of given schema!!!") # create database with given name, if present open the connection! Create table with columns given in schema self.dBOperation.createTableDb('Training', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table self.dBOperation.insertIntoTableGoodData('Training') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self,path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files for training!!') # extracting values from training schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema() # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") # replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() self.dataTransform.addQuotesToStringValuesInColumn() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log(self.file_object, "Creating Training_Database and tables on the basis of given schema!!!") # create database with given name, if present open the connection! Create table with columns given in schema self.dBOperation.createTableDb('Training', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table self.dBOperation.insertIntoTableGoodData('Training') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self,path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files for prediction!!') # extracting values from prediction schema column_names, noofcolumns = self.raw_data.valuesFromSchema() # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Creating Training_Database and tables on the basis of given schema!!!") # create database with given name, if present open the connection! Create table with columns given in schema #self.dBOperation.createTableDb('Training', column_names) #self.log_writer.log(self.file_object, "Table creation Completed!!") #self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table #self.dBOperation.insertIntoTableGoodData('Training') #self.log_writer.log(self.file_object, "Insertion in Table completed!!!") #self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.combinefiles() self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile #self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: self.log_writer.log(self.file_object, "{}".format(e)) raise e
class train_validation: def __init__(self, path, execution_id): self.raw_data = Raw_Data_validation(path, execution_id) self.dataTransform = dataTransform(execution_id) self.dBOperationMongoDB = DbOperationMongoDB(execution_id) #self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_database = "wafer_training_log" self.log_collection = "training_main_log" self.execution_id = execution_id #self.log_writer = logger.App_Logger() self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement() def train_validation(self): try: self.logDB_write.log(self.log_database, self.log_collection, 'Start of Validation on files!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.logDB_write.log(self.log_database, self.log_collection, "Raw Data Validation Complete!!") self.logDB_write.log(self.log_database, self.log_collection, "Starting Data Transforamtion!!") # replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() print("Missing value with NULL completed") self.logDB_write.log(self.log_database, self.log_collection, "DataTransformation Completed!!!") self.logDB_write.log( self.log_database, self.log_collection, "Creating database and collection if not exist then insert record" ) # create database with given name, if present open the connection! Create table with columns given in schema #self.dBOperationMongoDB.insertIntoTableGoodData(column_names) #self.logDB_write.log(self.log_database, self.log_collection, "Table creation Completed!!") #self.logDB_write.log(self.log_database, self.log_collection, "Insertion of Data into Table started!!!!") # insert csv files stored in azure storage in the table in mongodb location self.dBOperationMongoDB.insertIntoTableGoodData(column_names) self.logDB_write.log(self.log_database, self.log_collection, "Insertion in Table completed!!!") self.logDB_write.log(self.log_database, self.log_collection, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table #self.raw_data.deleteExistingGoodDataTrainingFolder() self.logDB_write.log(self.log_database, self.log_collection, "Good_Data folder deleted!!!") self.logDB_write.log( self.log_database, self.log_collection, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.logDB_write.log( self.log_database, self.log_collection, "Bad files moved to archive!! Bad folder Deleted!!") self.logDB_write.log(self.log_database, self.log_collection, "Validation Operation completed!!") self.logDB_write.log(self.log_database, self.log_collection, "Extracting csv file from table") # export data in table from mongodb to csvfile self.dBOperationMongoDB.selectingDatafromtableintocsv() #self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files!!') LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) regex = self.raw_data.manualRegexCreation() self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) self.raw_data.validateColumnLength(noofcolumns) self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") self.dataTransform.replaceMissingWithNull() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Training_Database and tables on the basis of given schema!!!" ) self.dBOperation.createTableDb('Training', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") self.dBOperation.insertIntoTableGoodData('Training') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = 'Training_Main_Log' self.log_writer = logger.App_Logger() self.emailObj = email() self.awsObj = AwsStorageManagement() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files for Training!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") # below function adds quotes to the '?' values in some columns. self.dataTransform.addQuotesToStringValuesInColumn() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Training_Database and tables on the basis of given schema!!!" ) # create database with given name, if present open the connection! Create table with columns given in schema self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table self.dBOperation.insertIntoTableGoodData('mushroomClassifierDB') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv( 'mushroomClassifierDB') # Triggering Email msg = MIMEMultipart() msg['Subject'] = 'MushroomTypeClassifier - Train Validation | ' + str( datetime.now()) file_list = self.awsObj.listDirFiles( 'Training_Bad_Raw_Files_Validated') if len(file_list) >= 1: file_str = ','.join(file_list) else: file_str = 'No Bad Files' body = 'Model Train Validation Done Successfully... <br><br> Fault File List: <br>' + file_str + '<br><br>Thanks and Regards, <br> Rahul Garg' msg.attach(MIMEText(body, 'html')) to_addr = ['*****@*****.**'] self.emailObj.trigger_mail(to_addr, [], msg) except Exception as e: raise e