def __init__(self,path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.cwd=os.getcwd() self.file_object = open(self.cwd+'Training_Main_Log.txt', 'a+') self.log_writer = logger.App_Logger()
def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = 'Training_Main_Log' self.log_writer = logger.App_Logger() self.emailObj = email() self.awsObj = AwsStorageManagement()
def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() #my_file = rootProjPath+'\\Training_Logs\\Training_Main_Log.txt' #self.file_object = open(my_file, 'a+') #self.rootProjPath=rootProjPath self.file_object = open('Training_Logs/Training_Main_Log.txt', 'a+') self.log_writer = logger.App_Logger()
def __init__(self, path, execution_id): self.raw_data = Raw_Data_validation(path, execution_id) self.dataTransform = dataTransform(execution_id) self.dBOperationMongoDB = DbOperationMongoDB(execution_id) #self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_database = "wafer_training_log" self.log_collection = "training_main_log" self.execution_id = execution_id #self.log_writer = logger.App_Logger() self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement()
class train_validation: def __init__(self): self.raw_data = Raw_Data_validation() # done self.dataTransform = dataTransform() # done self.dBOperation = dBOperation() # may be not required # self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log('wafer_log', 'Start of Validation on files!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log('wafer_log', "Raw Data Validation Complete!!") self.log_writer.log('wafer_log', "Starting Data Transforamtion!!") # replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() self.log_writer.log('wafer_log', "DataTransformation Completed!!!") # self.log_writer.log('wafer_log', "Creating Training_Database and tables on the basis of given schema!!!") # create database with given name, if present open the connection! Create table with columns given in schema # self.dBOperation.createTableDb('Training', column_names) # self.log_writer.log(self.file_object, "Table creation Completed!!") # self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table # self.dBOperation.insertIntoTableGoodData('Training') # self.log_writer.log(self.file_object, "Insertion in Table completed!!!") # self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table # self.raw_data.deleteExistingGoodDataTrainingFolder() # self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") # self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder # self.raw_data.moveBadFilesToArchiveBad() # self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log('wafer_log', "Validation Operation completed!!") # self.log_writer.log('wafer_log', "Extracting JSON file from DATABASE") # export data in table to csvfile # self.dBOperation.selectingDatafromtableintocsv('Training') # self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = DataTransform() self.DBOperation = DBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_logger() def train_validation(self): try: # This is the message we are giving to the logger object. self.log_writer.log(self.file_object, 'Start of Validation on Files') # extracting values for prediction LengthOfDateStampInFile, LengthOfTimeStampInFile, column_name, noofcolumns = self.raw_data.ValuesFromSchema( ) # Getting the RegEX defined to validate filename. regex = self.raw_data.manualRegexCreation() self.raw_data.ValidationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # Validate Column length in the File self.raw_data.ValidateColumnLength(noofcolumns) self.raw_data.ValidateMissingValuesInAllColumns() self.log_writer.log(self.file_object, 'Raw Validation of the FIle Completed!!!') self.dataTransform.replaceMissingValuesWithNULL() self.log_writer.log(self.file_object, 'Data Transformation Completed!') self.log_writer.log( self.file_object, 'Creation of DB and tables on the basis of given Schema Started!!' ) self.DBOperation.CreateTableDB('Training', column_name) self.DBOperation.InsertIntoTableGoodData('Training') self.log_writer.log( self.file_object, 'Insertion into the Good Raw Data Table Completed!!!!!') self.log_writer.log( self.file_object, 'Moving Bad Files to Archives and Deleting Them!!') # Move bad files to Archive Folder self.raw_data.moveToArchive() self.DBOperation.selectingDataFromTableIntocsv('Training') self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path, execution_id): self.raw_data = Raw_Data_validation(path, execution_id) self.dataTransform = dataTransform(execution_id) self.dBOperationMongoDB = DbOperationMongoDB(execution_id) #self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_database = "wafer_training_log" self.log_collection = "training_main_log" self.execution_id = execution_id #self.log_writer = logger.App_Logger() self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement() def train_validation(self): try: self.logDB_write.log(self.log_database, self.log_collection, 'Start of Validation on files!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.logDB_write.log(self.log_database, self.log_collection, "Raw Data Validation Complete!!") self.logDB_write.log(self.log_database, self.log_collection, "Starting Data Transforamtion!!") # replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() print("Missing value with NULL completed") self.logDB_write.log(self.log_database, self.log_collection, "DataTransformation Completed!!!") self.logDB_write.log( self.log_database, self.log_collection, "Creating database and collection if not exist then insert record" ) # create database with given name, if present open the connection! Create table with columns given in schema #self.dBOperationMongoDB.insertIntoTableGoodData(column_names) #self.logDB_write.log(self.log_database, self.log_collection, "Table creation Completed!!") #self.logDB_write.log(self.log_database, self.log_collection, "Insertion of Data into Table started!!!!") # insert csv files stored in azure storage in the table in mongodb location self.dBOperationMongoDB.insertIntoTableGoodData(column_names) self.logDB_write.log(self.log_database, self.log_collection, "Insertion in Table completed!!!") self.logDB_write.log(self.log_database, self.log_collection, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table #self.raw_data.deleteExistingGoodDataTrainingFolder() self.logDB_write.log(self.log_database, self.log_collection, "Good_Data folder deleted!!!") self.logDB_write.log( self.log_database, self.log_collection, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.logDB_write.log( self.log_database, self.log_collection, "Bad files moved to archive!! Bad folder Deleted!!") self.logDB_write.log(self.log_database, self.log_collection, "Validation Operation completed!!") self.logDB_write.log(self.log_database, self.log_collection, "Extracting csv file from table") # export data in table from mongodb to csvfile self.dBOperationMongoDB.selectingDatafromtableintocsv() #self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files!!') LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) regex = self.raw_data.manualRegexCreation() self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) self.raw_data.validateColumnLength(noofcolumns) self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") self.dataTransform.replaceMissingWithNull() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Training_Database and tables on the basis of given schema!!!" ) self.dBOperation.createTableDb('Training', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") self.dBOperation.insertIntoTableGoodData('Training') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files!!') # extracting values from prediction schema # we have removed some unnecessary columns from the schema itself # so that the DB columns won't get created for them and we won't waste space. # Un Necessary Columns: 'url','address','name','dish_liked','phone','reviews_list' LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") # below function adds quotes to the '?' values in some columns. self.dataTransform.addQuotesToStringValuesInColumn() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Training_Database and tables on the basis of given schema!!!" ) # create database with given name, if present open the connection! Create table with columns given in schema self.dBOperation.createTableDb('Training', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table self.dBOperation.insertIntoTableGoodData('Training') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: raise e
def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", "a+") self.log_writer = logger.App_Logger()
class train_validation: def __init__(self,path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files for prediction!!') # extracting values from prediction schema column_names, noofcolumns = self.raw_data.valuesFromSchema() # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Creating Training_Database and tables on the basis of given schema!!!") # create database with given name, if present open the connection! Create table with columns given in schema #self.dBOperation.createTableDb('Training', column_names) #self.log_writer.log(self.file_object, "Table creation Completed!!") #self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table #self.dBOperation.insertIntoTableGoodData('Training') #self.log_writer.log(self.file_object, "Insertion in Table completed!!!") #self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.combinefiles() self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile #self.dBOperation.selectingDatafromtableintocsv('Training') self.file_object.close() except Exception as e: self.log_writer.log(self.file_object, "{}".format(e)) raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dBOperation = dBOperation() self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files!!') # extracting values from prediction schema column_names, noofcolumns = self.raw_data.valuesFromSchema() # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex) # vNo documentation avaialidating column length in the file self.raw_data.validateColumnLength(noofcolumns) self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log( self.file_object, "Creating Training_Database and collection on the basis of given schema!!!" ) # create database with given name, if present open the connection! Create table with columns given in schema collection = self.dBOperation.createCollection() self.log_writer.log(self.file_object, "collection creation Completed!!") self.log_writer.log( self.file_object, "Insertion of Data into collection started!!!!") # insert csv files in the collection self.dBOperation.GoodDatainsertIntoCollection(collection) self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchive() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromCollectionintocsv(collection) self.file_object.close() except Exception as e: raise e
class train_validation: def __init__(self, path): self.raw_data = Raw_Data_validation(path) self.dataTransform = dataTransform() self.dBOperation = dBOperation() self.file_object = 'Training_Main_Log' self.log_writer = logger.App_Logger() self.emailObj = email() self.awsObj = AwsStorageManagement() def train_validation(self): try: self.log_writer.log(self.file_object, 'Start of Validation on files for Training!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") # below function adds quotes to the '?' values in some columns. self.dataTransform.addQuotesToStringValuesInColumn() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Training_Database and tables on the basis of given schema!!!" ) # create database with given name, if present open the connection! Create table with columns given in schema self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # insert csv files in the table self.dBOperation.insertIntoTableGoodData('mushroomClassifierDB') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv( 'mushroomClassifierDB') # Triggering Email msg = MIMEMultipart() msg['Subject'] = 'MushroomTypeClassifier - Train Validation | ' + str( datetime.now()) file_list = self.awsObj.listDirFiles( 'Training_Bad_Raw_Files_Validated') if len(file_list) >= 1: file_str = ','.join(file_list) else: file_str = 'No Bad Files' body = 'Model Train Validation Done Successfully... <br><br> Fault File List: <br>' + file_str + '<br><br>Thanks and Regards, <br> Rahul Garg' msg.attach(MIMEText(body, 'html')) to_addr = ['*****@*****.**'] self.emailObj.trigger_mail(to_addr, [], msg) except Exception as e: raise e
def __init__(self): self.raw_data = Raw_Data_validation() # done self.dataTransform = dataTransform() # done self.dBOperation = dBOperation() # may be not required # self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') self.log_writer = logger.App_Logger()