class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps X = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( X) #drops the first column for cluster prediction X['clusters'] = clusters clusters = X['clusters'].unique() predictions = [] for i in clusters: cluster_data = X[X['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) predictions = predictions + list(result) final = pd.DataFrame(predictions, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def __init__(self, path): self.file_object = 'Prediction_Log' self.log_writer = logger.App_Logger() self.awsObj = AwsStorageManagement() self.emailObj = email() if path is not None: self.pred_data_val = Prediction_Data_validation(path)
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_getter(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) #scale the prediction data data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction data_scaled['clusters']=clusters clusters=data_scaled['clusters'].unique() result=[] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data= data_scaled[data_scaled['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) json_data = result.to_json(orient='values') print(json_data) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path,json_data,result
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) is_null_present=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data) data=preprocessor.remove_columns(data,cols_to_drop) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction data['clusters']=clusters clusters=data['clusters'].unique() for i in clusters: #selecting all the records of a perticular cluster type cluster_data= data[data['clusters']==i] #getting all the wafer names wafer_names = list(cluster_data['Wafer']) #dropping wafer and clusters columns cluster_data = data.drop(['Wafer','clusters'],axis=1) #finding the model name for that cluster model_name = file_loader.find_correct_model_file(i) #loading the model using the model name model = file_loader.load_model(model_name) #these are the predicted values pred_values = list(model.predict(cluster_data)) #creating a dataframe with wafernames and predictions result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction']) #path to save the dataframe as csv file path = "Prediction_Output_File/Predictions.csv" #writing to csv files result.to_csv(path,header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, [ 'id', 'region', 'url', 'region_url', 'image_url', 'state', 'type', 'dogs_allowed' ]) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"]) is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) if(is_null_present): data=data.dropna(how='any') # get encoded values for categorical data data = preprocessor.encodeCategoricalValues(data) #scale the prediction data data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction data_scaled['clusters']=clusters clusters=data_scaled['clusters'].unique() result=[] # initialize blank list for storing predicitons for i in clusters: cluster_data= data_scaled[data_scaled['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def __init__(self, path): #self.rootProjPath=rootProjPath self.raw_data = Prediction_Data_validation(path) self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() #my_file = rootProjPath+'\\Prediction_Logs\\Prediction_Log.txt' #self.file_object = open(my_file, 'a+') self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger()
def __init__(self, path, execution_id): self.execution_id = execution_id #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() self.log_database = "wafer_prediction_log" self.log_collection = "prediction_log" self.log_db_writer = App_LoggerDB(execution_id) self.az_blob_mgt = AzureBlobManagement() if path is not None: self.pred_data_val = Prediction_Data_validation(path, execution_id)
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data.replace(-1, 1, inplace=True) new_data = data[[ 'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59', 'A91' ]] #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( new_data) #drops the first column for cluster prediction new_data['clusters'] = clusters clusters = new_data['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = new_data[new_data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def __init__(self, path, execution_id): self.raw_data = Prediction_Data_validation(path, execution_id) self.dataTransform = dataTransformPredict(execution_id) self.dBOperationMongoDB = DbOperationMongoDB(execution_id) #self.dBOperation = dBOperation(execution_id) self.log_database = "wafer_prediction_log" self.log_collection = "prediction_main_log" self.execution_id = execution_id #self.log_writer = logger.App_Logger() self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement()
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation( data) data = preprocessor.remove_columns(data, cols_to_drop) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict(data.drop(['Wafer'], axis=1)) data['clusters'] = clusters clusters = data['clusters'].unique() for i in clusters: cluster_data = data[data['clusters'] == i] wafer_names = list(cluster_data['Wafer']) cluster_data = data.drop(labels=['Wafer'], axis=1) cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) result = pandas.DataFrame(list(zip(wafer_names, result)), columns=['Wafer', 'Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def __init__(self, path, execution_id): self.raw_data = Prediction_Data_validation(path, execution_id) self.dataTransform = dataTransformPredict(execution_id) #self.dBOperation = dBOperation() #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() self.dBOperationMongoDB = DbOperationMongoDB(execution_id) self.log_database = "strength_prediction_log" self.log_collection = "stg-prediction_main_log" self.execution_id = execution_id self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement()
class pred_validation: def __init__(self, path): self.raw_data = Prediction_Data_validation(path) self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.log_writer.log( self.file_object, 'Start of Validation on files for prediction!!') #extracting values from prediction schema column_names, noofcolumns = self.raw_data.valuesFromSchema() #validating column length in the file self.raw_data.validateColumnLength(noofcolumns) #validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log( self.file_object, "Creating Prediction_Database and tables on the basis of given schema!!!" ) #create database with given name, if present open the connection! Create table with columns given in schema self.dBOperation.createTableDb('Prediction', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") #insert csv files in the table self.dBOperation.insertIntoTableGoodData('Prediction') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") #Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") #Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") #export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv('Prediction') except Exception as e: raise e
class pred_validation: def __init__(self, path): self.raw_data = Prediction_Data_validation(path) self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.log_writer.log( self.file_object, 'Start of Validation on files for prediction!!') LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) regex = self.raw_data.manualRegexCreation() self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) self.raw_data.validateColumnLength(noofcolumns) self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data validation complete!!") self.log_writer.log(self.file_object, "Starting Data transformation!!") self.dataTransform.replaceSingleQuotesToDouble() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") self.log_writer.log( self.file_object, "Creating Prediction_Database and tables on the basis of given schema!!!" ) self.dBOperation.createTableDb('Prediction', column_names) self.log_writer.log(self.file_object, "Table creation Completed!!") self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") self.dBOperation.insertIntoTableGoodData('Prediction') self.log_writer.log(self.file_object, "Insertion in Table completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder") self.raw_data.deleteExistingGoodDataPredictingFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") self.dBOperation.selectingDatafromtableintocsv('Prediction') except Exception as e: raise e
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #cdrop 'Unnamed: 0' for cl in data.columns: if cl == 'Unnamed: 0': data.drop('Unnamed: 0', axis=1, inplace=True) # Dropping column after performing EDA preprocessor_cus = preprocess_cus.Preprocessor_cus( self.file_object, self.log_writer) data = preprocessor_cus.drop_column(data) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) # get encoded values for categorical data data = preprocessor_cus.test_data_encode(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) result = [] # initialize balnk list for storing predicitons model = file_loader.load_model('CatBoost') for val in (model.predict(data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True) result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class pred_validation: def __init__(self): self.raw_data = Prediction_Data_validation() self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() self.file_object = 'wafer_log' # open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.log_writer.log( self.file_object, 'Start of Validation on files for prediction!!') # extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) # getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() # validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # validating column length in the file self.raw_data.validateColumnLength(noofcolumns) # validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Prediction Raw Data Validation Complete!!") self.log_writer.log(self.file_object, ("Starting Data Transforamtion!!")) # replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() self.log_writer.log(self.file_object, "DataTransformation Completed!!!") # self.log_writer.log(self.file_object, # "Creating Prediction_Database and tables on the basis of given schema!!!") # # create database with given name, if present open the connection! Create table with columns given in schema # self.dBOperation.createTableDb('Prediction', column_names) # self.log_writer.log(self.file_object, "Table creation Completed!!") # self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") # # insert csv files in the table # self.dBOperation.insertIntoTableGoodData('Prediction') # self.log_writer.log(self.file_object, "Insertion in Table completed!!!") # self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # # Delete the good data folder after loading files in table # self.raw_data.deleteExistingGoodDataTrainingFolder() # self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") # self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # # Move the bad files to archive folder # self.raw_data.moveBadFilesToArchiveBad() # self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") # self.log_writer.log(self.file_object, "Extracting csv file from table") # # export data in table to csvfile # self.dBOperation.selectingDatafromtableintocsv('Prediction') except Exception as e: raise e
class prediction: def __init__(self, path, execution_id): #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() #self.pred_data_val = Prediction_Data_validation(path) self.execution_id = execution_id self.log_database = "strength_prediction_log" self.log_collection = "prediction_log" self.log_db_writer = App_LoggerDB(execution_id) self.az_blob_mgt = AzureBlobManagement() if path is not None: self.pred_data_val = Prediction_Data_validation(path, execution_id) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_db_writer.log(self.log_database, self.log_collection, 'Start of Prediction') print("start of prediction") data_getter = data_loader_prediction.Data_Getter_Pred( self.log_database, self.log_collection, self.execution_id) data = data_getter.get_data() path = "" if data.__len__() == 0: self.log_db_writer.log( self.log_database, self.log_collection, "No data was present to perform prediction existing prediction method" ) return path, "No data was present to perform prediction" #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.log_database, self.log_collection, self.execution_id) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) print("after log Transformation") print(data) #scale the prediction data data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) print("standard scaling for data completed") print(data_scaled) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.log_database, self.log_collection, self.execution_id) kmeans = file_loader.load_model('kkmeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) print(model_name) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['strength-Predictions']) #result = list(model.predict(cluster_data)) #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction']) #for val in (model.predict(cluster_data.values)): # result.append(val) #print(self.result.shape) print("results after prediction with prediction columns") print(result) path = "Prediction-Output-File" #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.az_blob_mgt.saveDataFrametoCSV( path, "cement-strength-prediction.csv", result, header=True, mode="a+") self.log_db_writer.log(self.log_database, self.log_collection, 'End of Prediction') except Exception as ex: self.log_db_writer.log( self.log_database, self.log_collection, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.scaleData(data) #data = preprocessor.enocdeCategoricalvalues(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data) #drops the first column for cluster prediction data['clusters'] = clusters clusters = data['clusters'].unique() result = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): if val == 0: result.append("Lodgepole_Pine") elif val == 1: result.append("Spruce_Fir") elif val == 2: result.append("Douglas_fir") elif val == 3: result.append("Krummholz") elif val == 4: result.append("Ponderosa_Pine") elif val == 5: result.append("Aspen") elif val == 6: result.append("Cottonwood_Willow") result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+") self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() # deletes the existing prediction file from last run! self.log_writer.log(self.file_object, "Start of Prediction") data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer ) data = data_getter.get_data() # code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(data) if is_null_present: data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(data) data = preprocessor.remove_columns(data, cols_to_drop) # data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model("KMeans") ##Code changed # pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data.drop(["Wafer"], axis=1) ) # drops the first column for cluster prediction data["clusters"] = clusters clusters = data["clusters"].unique() for i in clusters: cluster_data = data[data["clusters"] == i] wafer_names = list(cluster_data["Wafer"]) cluster_data = data.drop(labels=["Wafer"], axis=1) cluster_data = cluster_data.drop(["clusters"], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) result = pandas.DataFrame( list(zip(wafer_names, result)), columns=["Wafer", "Prediction"] ) path = "Prediction_Output_File/Predictions.csv" result.to_csv( "Prediction_Output_File/Predictions.csv", header=True, mode="a+" ) # appends result to prediction file self.log_writer.log(self.file_object, "End of Prediction") except Exception as ex: self.log_writer.log( self.file_object, "Error occured while running the prediction!! Error:: %s" % ex, ) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['veiltype']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = 'Prediction_Log' self.log_writer = logger.App_Logger() self.awsObj = AwsStorageManagement() self.emailObj = email() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['veil-type']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') print(kmeans.labels_) ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" self.awsObj.saveDataframeToCsv('Prediction_Output_File', 'Predictions.csv', result) self.log_writer.log(self.file_object, 'End of Prediction') msg = MIMEMultipart() msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str( datetime.now()) body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg' msg.attach(MIMEText(body, 'html')) to_addr = ['*****@*****.**'] self.emailObj.trigger_mail(to_addr, [], msg) except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data,[]) # remove the column as it doesn't contribute to prediction. data.replace('?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present(data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values(data, cols_with_missing_values) # missing value imputation # encode categorical data #data = preprocessor.encode_categorical_columns(data) df=data.copy() df.drop(labels=['Sex'],axis=1,inplace=True) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters=kmeans.predict(df) data['clusters']=clusters data = preprocessor.encode_categorical_columns(data) clusters=data['clusters'].unique() predictions=[] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(np.array(cluster_data))) for res in result: if res == 0: predictions.append('1-8 Rings') elif res == 1: predictions.append('11+ Rings') else: predictions.append('9-10 Rings') final= pd.DataFrame(list(zip(predictions)),columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path , final
def __init__(self): self.file_object = 'wafer_log' self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation()
class pred_validation: def __init__(self, path, execution_id): self.raw_data = Prediction_Data_validation(path, execution_id) self.dataTransform = dataTransformPredict(execution_id) self.dBOperationMongoDB = DbOperationMongoDB(execution_id) #self.dBOperation = dBOperation(execution_id) self.log_database = "wafer_prediction_log" self.log_collection = "prediction_main_log" self.execution_id = execution_id #self.log_writer = logger.App_Logger() self.logDB_write = App_LoggerDB(execution_id=execution_id) self.az_blob_mgt = AzureBlobManagement() #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.logDB_write.log( self.log_database, self.log_collection, 'Start of Validation on files for prediction!!') #extracting values from prediction schema LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema( ) #getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() #validating filename of prediction files self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) #validating column length in the file self.raw_data.validateColumnLength(noofcolumns) #validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.logDB_write.log(self.log_database, self.log_collection, "Raw Data Validation Complete!!") self.logDB_write.log(self.log_database, self.log_collection, ("Starting Data Transforamtion!!")) #replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.replaceMissingWithNull() self.logDB_write.log(self.log_database, self.log_collection, "DataTransformation Completed!!!") self.logDB_write.log( self.log_database, self.log_collection, "Creating Prediction_Database and tables on the basis of given schema!!!" ) #create database with given name, if present open the connection! Create table with columns given in schema self.logDB_write.log( self.log_database, self.log_collection, "Creating database and collection if not exist then insert record" ) #insert csv files in the table self.dBOperationMongoDB.insertIntoTableGoodData(column_names) self.logDB_write.log(self.log_database, self.log_collection, "Insertion in Table completed!!!") #self.logDB_write.log(self.log_database, self.log_collection,"Deleting Good Data Folder!!!") #Delete the good data folder after loading files in table #self.raw_data.deleteExistingGoodDataTrainingFolder() #self.logDB_write.log(self.log_database, self.log_collection,"Good_Data folder deleted!!!") self.logDB_write.log( self.log_database, self.log_collection, "Moving bad files to Archive and deleting Bad_Data folder!!!") #Move the bad files to archive folder print("moving bad files to archieve") self.raw_data.moveBadFilesToArchiveBad() self.logDB_write.log( self.log_database, self.log_collection, "Bad files moved to archive!! Bad folder Deleted!!") self.logDB_write.log(self.log_database, self.log_collection, "Validation Operation completed!!") self.logDB_write.log(self.log_database, self.log_collection, "Extracting csv file from table") #export data in table to csvfile self.dBOperationMongoDB.selectingDatafromtableintocsv() except Exception as e: raise e
def __init__(self, path): self.raw_data = Prediction_Data_validation(path) self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger()
class pred_validation: def __init__(self, path): self.raw_data = Prediction_Data_validation(path) self.dBOperation = dBOperation() self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.log_writer.log( self.file_object, 'Start of Validation on files for prediction!!') #extracting values from prediction schema column_names, noofcolumns = self.raw_data.valuesFromSchema() #getting the regex defined to validate filename #regex = self.raw_data.manualRegexCreation() #validating filename of prediction files #self.raw_data.validationFileNameRaw(regex) #validating number of columns self.raw_data.validateColumnLength(noofcolumns) #validating if any column has all values missing #self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") self.log_writer.log( self.file_object, "Creating Prediction_Database and collection on the basis of given schema!!!" ) # create database with given name, if present open the connection! Create table with columns given in schema collection = self.dBOperation.createCollection() self.log_writer.log(self.file_object, "collection creation Completed!!") self.log_writer.log( self.file_object, "Insertion of Data into collection started!!!!") # insert csv files in the collection self.dBOperation.GoodDatainsertIntoCollection(collection) self.log_writer.log(self.file_object, "Insertion in collection completed!!!") self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") # Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataPredictionFolder() self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") self.log_writer.log( self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") # Move the bad files to archive folder self.raw_data.moveBadFilesToArchive() self.log_writer.log( self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") self.log_writer.log(self.file_object, "Validation Operation completed!!") self.log_writer.log(self.file_object, "Extracting csv file from table") # export data in table to csvfile self.dBOperation.selectingDatafromCollectionintocsv(collection) self.file_object.close() except Exception as e: raise e
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'policy_number', 'policy_bind_date', 'policy_state', 'insured_zip', 'incident_location', 'incident_date', 'incident_state', 'incident_city', 'insured_hobbies', 'auto_make', 'auto_model', 'auto_year', 'age', 'total_claim_amount' ]) data.replace('?', np.NaN, inplace=True) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) data = preprocessor.encode_categorical_columns(data) data = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() predictions = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('N') else: predictions.append('Y') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'education' ]) # remove the column as it doesn't contribute to prediction. data = preprocessor.remove_unwanted_spaces( data) # remove unwanted spaces from the dataframe data.replace( '?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps scaled_num_df = preprocessor.scale_numerical_columns(data) cat_df = preprocessor.encode_categorical_columns(data) X = pd.concat([scaled_num_df, cat_df], axis=1) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( X) #drops the first column for cluster prediction X['clusters'] = clusters clusters = X['clusters'].unique() predictions = [] for i in clusters: cluster_data = X[X['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('<=50K') else: predictions.append('>50K') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+") self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path)
class pred_validation: def __init__(self,path): self.raw_data = Prediction_Data_validation(path) self.dataTransform = dataTransformPredict() self.dBOperation = dBOperation() self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() def prediction_validation(self): try: self.log_writer.log(self.file_object,'Start of Validation on files for prediction!!') #extracting values from prediction schema column_names,noofcolumns = self.raw_data.valuesFromSchema() #getting the regex defined to validate filename regex = self.raw_data.manualRegexCreation() #validating filename of prediction files self.raw_data.validationFileNameRaw(regex) #validating column length in the file self.raw_data.validateColumnLength(noofcolumns) #validating if any column has all values missing self.raw_data.validateMissingValuesInWholeColumn() self.log_writer.log(self.file_object,"Raw Data Validation Complete!!") self.log_writer.log(self.file_object,"Affan here") self.log_writer.log(self.file_object,("Starting Data Transforamtion!!")) #self.dataTransform.addQuotesToStringValuesInColumn() #replacing blanks in the csv file with "Null" values to insert in table self.dataTransform.EDA() self.log_writer.log(self.file_object,"EDA Completed!!!") self.log_writer.log(self.file_object,"DataTransformation Completed!!!") column_names_Updated,noofcolumns = self.raw_data.valuesFromSchema_updated() self.log_writer.log(self.file_object,"Creating Prediction_Database and tables on the basis of given schema!!!") #create database with given name, if present open the connection! Create table with columns given in schema #self.dBOperation.dataBaseConnection('Prediction') #column_names_updated_after_EDA = ['PassengerId','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked','Title'] #self.log_writer.log(self.file_object,"DB Conn Chk") self.dBOperation.createTableDb('Prediction',column_names_Updated) self.log_writer.log(self.file_object,"Table creation Completed!!") self.log_writer.log(self.file_object,"Insertion of Data into Table started!!!!") #insert csv files in the table self.dBOperation.insertIntoTableGoodData('Prediction') self.log_writer.log(self.file_object,"Insertion in Table completed!!!") self.log_writer.log(self.file_object,"Deleting Good Data Folder!!!") #Delete the good data folder after loading files in table self.raw_data.deleteExistingGoodDataTrainingFolder() self.log_writer.log(self.file_object,"Good_Data folder deleted!!!") self.log_writer.log(self.file_object,"Moving bad files to Archive and deleting Bad_Data folder!!!") #Move the bad files to archive folder self.raw_data.moveBadFilesToArchiveBad() self.log_writer.log(self.file_object,"Bad files moved to archive!! Bad folder Deletedaffan1!!") self.log_writer.log(self.file_object,"Validation Operation completedaffan2!!") self.log_writer.log(self.file_object,"Extracting csv file from table affan3") #export data in table to csvfile self.dBOperation.selectingDatafromtableintocsv('Prediction') self.log_writer.log(self.file_object,"Selected data from table into csv affan") except Exception as e: raise e