class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps X = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( X) #drops the first column for cluster prediction X['clusters'] = clusters clusters = X['clusters'].unique() predictions = [] for i in clusters: cluster_data = X[X['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) predictions = predictions + list(result) final = pd.DataFrame(predictions, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, [ 'id', 'region', 'url', 'region_url', 'image_url', 'state', 'type', 'dogs_allowed' ]) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_getter(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) #scale the prediction data data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction data_scaled['clusters']=clusters clusters=data_scaled['clusters'].unique() result=[] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data= data_scaled[data_scaled['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) json_data = result.to_json(orient='values') print(json_data) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path,json_data,result
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) is_null_present=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data) data=preprocessor.remove_columns(data,cols_to_drop) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction data['clusters']=clusters clusters=data['clusters'].unique() for i in clusters: #selecting all the records of a perticular cluster type cluster_data= data[data['clusters']==i] #getting all the wafer names wafer_names = list(cluster_data['Wafer']) #dropping wafer and clusters columns cluster_data = data.drop(['Wafer','clusters'],axis=1) #finding the model name for that cluster model_name = file_loader.find_correct_model_file(i) #loading the model using the model name model = file_loader.load_model(model_name) #these are the predicted values pred_values = list(model.predict(cluster_data)) #creating a dataframe with wafernames and predictions result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction']) #path to save the dataframe as csv file path = "Prediction_Output_File/Predictions.csv" #writing to csv files result.to_csv(path,header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"]) is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) if(is_null_present): data=data.dropna(how='any') # get encoded values for categorical data data = preprocessor.encodeCategoricalValues(data) #scale the prediction data data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction data_scaled['clusters']=clusters clusters=data_scaled['clusters'].unique() result=[] # initialize blank list for storing predicitons for i in clusters: cluster_data= data_scaled[data_scaled['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data.replace(-1, 1, inplace=True) new_data = data[[ 'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59', 'A91' ]] #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( new_data) #drops the first column for cluster prediction new_data['clusters'] = clusters clusters = new_data['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = new_data[new_data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation( data) data = preprocessor.remove_columns(data, cols_to_drop) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict(data.drop(['Wafer'], axis=1)) data['clusters'] = clusters clusters = data['clusters'].unique() for i in clusters: cluster_data = data[data['clusters'] == i] wafer_names = list(cluster_data['Wafer']) cluster_data = data.drop(labels=['Wafer'], axis=1) cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) result = pandas.DataFrame(list(zip(wafer_names, result)), columns=['Wafer', 'Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #cdrop 'Unnamed: 0' for cl in data.columns: if cl == 'Unnamed: 0': data.drop('Unnamed: 0', axis=1, inplace=True) # Dropping column after performing EDA preprocessor_cus = preprocess_cus.Preprocessor_cus( self.file_object, self.log_writer) data = preprocessor_cus.drop_column(data) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) # get encoded values for categorical data data = preprocessor_cus.test_data_encode(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) result = [] # initialize balnk list for storing predicitons model = file_loader.load_model('CatBoost') for val in (model.predict(data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True) result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'education' ]) # remove the column as it doesn't contribute to prediction. data = preprocessor.remove_unwanted_spaces( data) # remove unwanted spaces from the dataframe data.replace( '?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps scaled_num_df = preprocessor.scale_numerical_columns(data) cat_df = preprocessor.encode_categorical_columns(data) X = pd.concat([scaled_num_df, cat_df], axis=1) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( X) #drops the first column for cluster prediction X['clusters'] = clusters clusters = X['clusters'].unique() predictions = [] for i in clusters: cluster_data = X[X['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('<=50K') else: predictions.append('>50K') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.scaleData(data) #data = preprocessor.enocdeCategoricalvalues(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data) #drops the first column for cluster prediction data['clusters'] = clusters clusters = data['clusters'].unique() result = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): if val == 0: result.append("Lodgepole_Pine") elif val == 1: result.append("Spruce_Fir") elif val == 2: result.append("Douglas_fir") elif val == 3: result.append("Krummholz") elif val == 4: result.append("Ponderosa_Pine") elif val == 5: result.append("Aspen") elif val == 6: result.append("Cottonwood_Willow") result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['veiltype']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() self.log_writer.log(self.file_object, 'Let me chk data') print(data.head()) #self.log_writer.log(self.file_object,data.head()) #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) #data=preprocessor.remove_columns(data,['Wafer']) # remove the unnamed column as it doesn't contribute to prediction. data = preprocessor.binning(data) #removing unwanted columns as discussed in the EDA part in ipynb file data = preprocessor.dropUnnecessaryColumns(data, ['Ageband']) #print(data.isnull().sum()) data = preprocessor.combiningfornewfeature(data) data = preprocessor.dropUnnecessaryColumns( data, ['Parch', 'Sibsp', 'FamilySize']) data = preprocessor.convertCategoricalfeatureIntonumeric(data) data = preprocessor.binningfare(data) data = preprocessor.dropUnnecessaryColumns( data, ['FareBand', 'PassengerId']) print(data.head()) #print(data.isnull().sum()) # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data) # missing value imputation self.log_writer.log(self.file_object, '--Fati-03.5--') data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) #data_scaled = pandas.DataFrame(data,columns=data.columns) self.log_writer.log(self.file_object, '--Fati-04--,It worked :)') ##----Predictions left only, will work after lunch IA data = data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) RfClassifier = file_loader.load_model('RandomForestClassifier') self.log_writer.log(self.file_object, '--Fati-05--') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) classifier = RfClassifier.predict( data_scaled) #drops the first column for cluster prediction self.log_writer.log(self.file_object, '--Fati-06--') #data_scaled['clusters']=clusters #self.log_writer.log(self.file_object,'--Fati-07--') #clusters=data_scaled['clusters'].unique() self.log_writer.log(self.file_object, '--Fati-08--') print(classifier) #result=[] # initialize blank list for storing predicitons #with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) #for i in clusters: # cluster_data= data_scaled[data_scaled['clusters']==i] # cluster_data = cluster_data.drop(['clusters'],axis=1) # model_name = file_loader.find_correct_model_file(i) #model = file_loader.load_model(model_name) # result.append(val) result = pandas.DataFrame(classifier, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'policy_number', 'policy_bind_date', 'policy_state', 'insured_zip', 'incident_location', 'incident_date', 'incident_state', 'incident_city', 'insured_hobbies', 'auto_make', 'auto_model', 'auto_year', 'age', 'total_claim_amount' ]) data.replace('?', np.NaN, inplace=True) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) data = preprocessor.encode_categorical_columns(data) data = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() predictions = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('N') else: predictions.append('Y') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self,path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data,[]) # remove the column as it doesn't contribute to prediction. data.replace('?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present(data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values(data, cols_with_missing_values) # missing value imputation # encode categorical data #data = preprocessor.encode_categorical_columns(data) df=data.copy() df.drop(labels=['Sex'],axis=1,inplace=True) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters=kmeans.predict(df) data['clusters']=clusters data = preprocessor.encode_categorical_columns(data) clusters=data['clusters'].unique() predictions=[] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(np.array(cluster_data))) for res in result: if res == 0: predictions.append('1-8 Rings') elif res == 1: predictions.append('11+ Rings') else: predictions.append('9-10 Rings') final= pd.DataFrame(list(zip(predictions)),columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path , final
class prediction: def __init__(self, path): self.file_object = 'Prediction_Log' self.log_writer = logger.App_Logger() self.awsObj = AwsStorageManagement() self.emailObj = email() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['veil-type']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') print(kmeans.labels_) ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" self.awsObj.saveDataframeToCsv('Prediction_Output_File', 'Predictions.csv', result) self.log_writer.log(self.file_object, 'End of Prediction') msg = MIMEMultipart() msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str( datetime.now()) body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg' msg.attach(MIMEText(body, 'html')) to_addr = ['*****@*****.**'] self.emailObj.trigger_mail(to_addr, [], msg) except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path, execution_id): #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() #self.pred_data_val = Prediction_Data_validation(path) self.execution_id = execution_id self.log_database = "strength_prediction_log" self.log_collection = "prediction_log" self.log_db_writer = App_LoggerDB(execution_id) self.az_blob_mgt = AzureBlobManagement() if path is not None: self.pred_data_val = Prediction_Data_validation(path, execution_id) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_db_writer.log(self.log_database, self.log_collection, 'Start of Prediction') print("start of prediction") data_getter = data_loader_prediction.Data_Getter_Pred( self.log_database, self.log_collection, self.execution_id) data = data_getter.get_data() path = "" if data.__len__() == 0: self.log_db_writer.log( self.log_database, self.log_collection, "No data was present to perform prediction existing prediction method" ) return path, "No data was present to perform prediction" #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.log_database, self.log_collection, self.execution_id) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) print("after log Transformation") print(data) #scale the prediction data data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) print("standard scaling for data completed") print(data_scaled) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.log_database, self.log_collection, self.execution_id) kmeans = file_loader.load_model('kkmeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) print(model_name) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['strength-Predictions']) #result = list(model.predict(cluster_data)) #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction']) #for val in (model.predict(cluster_data.values)): # result.append(val) #print(self.result.shape) print("results after prediction with prediction columns") print(result) path = "Prediction-Output-File" #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.az_blob_mgt.saveDataFrametoCSV( path, "cement-strength-prediction.csv", result, header=True, mode="a+") self.log_db_writer.log(self.log_database, self.log_collection, 'End of Prediction') except Exception as ex: self.log_db_writer.log( self.log_database, self.log_collection, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data = preprocessor.logTransformation(data) #encode the prediction data data_scaled = preprocessor.encodeCategoricalValuesPrediction(data) ###Time features data = preprocessor.create_timefeatures(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) model = file_loader.load_model('XGBOOST') result.model.predict(data) result = pandas.DataFrame(result, columns=['Predictions']) result['Item_Identifier'] = data["Item_Identifier"] result["Outlet_Identifier"] = data["Outlet_Identifier"] path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path def prediction_from_user(self): try: dbConn = pymongo.MongoClient( "mongodb://localhost:27017/") # opening a connection to Mongo db = dbConn[ 'HOURLYTRAFFICDETAILS'] # connecting to the database called DB # reading the inputs given by the user holiday = request.form['holiday'] if (holiday == 'None'): Holiday_True = 0 else: Holiday_True = 1 temp = float(request.form['temp']) # float variable # rain_1h = float(request.form['rain']) #float variable # snow_1h = float(request.form['snow']) #float variable clouds_all = int(request.form['cloud']) # float variable weather_main = request.form[ 'weather'] # Categorical variable will be encoded date_time = request.form['Date_time'] previous_hour = request.form['previoushourtraffic'] list = ['CLOUDS', 'MIST', 'RAIN', 'SNOW', 'OTHERS'] # Creating a dictionary dict_pred = { 'holiday': Holiday_True, 'temp': temp, 'rain_1h': rain_1h, 'snow_1h': snow_1h, 'clouds_all': clouds_all, 'date_time': date_time, 'weather_main': weather_main, 'previous_ihr': previous_hour } # Creating a dataframe df = pd.DataFrame(dict_pred, index=[ 0, ]) # Converting to DateTime df['date_time'] = pd.to_datetime(df.date_time) df['weekday'] = df.date_time.dt.weekday # Monday is 0 and Sunday is 6 df['hour'] = df.date_time.dt.hour df['month'] = df.date_time.dt.month df['year'] = df.date_time.dt.year df['weather_main'] = np.where( df['weather_main'].upper().isin(list), df['weather_main'], 'OTHERS') # Applying one-hot encoding filename = 'OHE.pkl' # Opening and loading the pickle file ohe = pickle.load(open(filename, 'rb')) weather_main_df = pd.DataFrame( ohe.transform(df[['weather_main']]).toarray()) weather_main_df.columns = ohe.get_feature_names(['weather']) # Join the encoded dataset with the given one df = df.join(weather_main_df) # Dropping categorical column df.drop('weather_main', axis=1, inplace=True) # setting the index to date_time value df.set_index('date_time', inplace=True) # print df info # print(df.head()) model = file_loader.load_model(xgb) # Prediction using the loaded pickle file predictionout = model.predict(df) table = db['TRAFFIC'] mydict = { 'holiday': Holiday_True, 'temp': temp, 'rain_1h': rain_1h, 'snow_1h': snow_1h, 'clouds_all': clouds_all, 'date_time': str(date_time), 'weather_main': weather_main, 'traffic_volume': prediction, 'previous_1hr': previous_hour } # saving that detail to a dictionary x = table.insert_one(mydict) return predictionout except Exception as e: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % e) raise e
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') self.log_writer = logger.App_Logger() self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns( data, ["Index_Product", "sku", "oe_constraint" ]) #removing oe_constraint as it was removed in training data = preprocessor.encodeCategoricalValuesPred(data) is_null_present = preprocessor.is_null_present(data) if (is_null_present): #data=preprocessor.impute_missing_values(data) data = data.dropna() #cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data) #data=preprocessor.remove_columns(data,cols_to_drop) data = preprocessor.scale_numerical_columns(data) data = preprocessor.pcaTransformation(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) # kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) #clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction #data['clusters']=clusters ##for i in clusters: #cluster_data= data[data['clusters']==i] #wafer_names = list(cluster_data['Wafer']) ##cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file() model = file_loader.load_model(model_name) result = list(model.predict(data)) #result = pandas.DataFrame(list(zip(wafer_names,result)),columns=['Wafer','Prediction']) result = pandas.DataFrame(result, columns=['Prediction']) result["Prediction"] = result["Prediction"].map({ 0: "Yes", 1: "No" }) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
class prediction: def __init__(self, path): self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+") self.log_writer = logger.App_Logger() if path is not None: self.pred_data_val = Prediction_Data_validation(path) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() # deletes the existing prediction file from last run! self.log_writer.log(self.file_object, "Start of Prediction") data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer ) data = data_getter.get_data() # code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(data) if is_null_present: data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(data) data = preprocessor.remove_columns(data, cols_to_drop) # data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model("KMeans") ##Code changed # pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data.drop(["Wafer"], axis=1) ) # drops the first column for cluster prediction data["clusters"] = clusters clusters = data["clusters"].unique() for i in clusters: cluster_data = data[data["clusters"] == i] wafer_names = list(cluster_data["Wafer"]) cluster_data = data.drop(labels=["Wafer"], axis=1) cluster_data = cluster_data.drop(["clusters"], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) result = pandas.DataFrame( list(zip(wafer_names, result)), columns=["Wafer", "Prediction"] ) path = "Prediction_Output_File/Predictions.csv" result.to_csv( "Prediction_Output_File/Predictions.csv", header=True, mode="a+" ) # appends result to prediction file self.log_writer.log(self.file_object, "End of Prediction") except Exception as ex: self.log_writer.log( self.file_object, "Error occured while running the prediction!! Error:: %s" % ex, ) raise ex return path, result.head().to_json(orient="records")
class prediction: def __init__(self, path, execution_id): self.execution_id = execution_id #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') #self.log_writer = logger.App_Logger() self.log_database = "wafer_prediction_log" self.log_collection = "prediction_log" self.log_db_writer = App_LoggerDB(execution_id) self.az_blob_mgt = AzureBlobManagement() if path is not None: self.pred_data_val = Prediction_Data_validation(path, execution_id) def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_db_writer.log(self.log_database, self.log_collection, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.log_database, self.log_collection, self.execution_id) data = data_getter.get_data() path = "" if data.__len__() == 0: self.log_db_writer.log( self.log_database, self.log_collection, "No data was present to perform prediction existing prediction method" ) return path, "No data was present to perform prediction" #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.log_database, self.log_collection, self.execution_id) is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation( data) data = preprocessor.remove_columns(data, cols_to_drop) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.log_database, self.log_collection, self.execution_id) kmeans = file_loader.load_model('KMeans') #print(kmeans) ##Code changed pred_data = data.drop(['Wafer'], axis=1) clusters = kmeans.predict( pred_data) #drops the first column for cluster prediction data['clusters'] = clusters clust = data['clusters'].unique() for i in clust: cluster_data = data[ data['clusters'] == i] # filteing of data with clusters no 0,1,2,etc. wafer_names = list(cluster_data['Wafer']) cluster_data = data.drop(labels=['Wafer'], axis=1) cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) self.result = pandas.DataFrame(list(zip(wafer_names, result)), columns=['Wafer', 'Prediction']) print(self.result.shape) print(self.result) #path="Prediction_Output_File/Predictions.csv" path = "prediction-output-file" self.az_blob_mgt.saveDataFrametoCSV(path, "prediction.csv", self.result, header=True, mode="a+") #result.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file #self.log_writer.log(self.file_object,'End of Prediction') self.log_db_writer.log(self.log_database, self.log_collection, 'End of prediction') except Exception as ex: #self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) self.log_db_writer.log( self.log_database, self.log_collection, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, self.result.head().to_json(orient="records")