def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation( data) data = preprocessor.remove_columns(data, cols_to_drop) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data.drop( ['Wafer'], axis=1)) #drops the first column for cluster prediction data['clusters'] = clusters data.to_csv( 'Prediction_Raw_Files_Validated/finalpredictiondata.csv', index=False) clusters = data['clusters'].unique() for i in clusters: cluster_data = data[data['clusters'] == i] wafer_names = list(cluster_data['Wafer']) cluster_data = data.drop(labels=['Wafer'], axis=1) cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) result = pandas.DataFrame(list(zip(wafer_names, result)), columns=['Wafer', 'Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'education']) # remove the column as it doesn't contribute to prediction. data = preprocessor.remove_unwanted_spaces(data) # remove unwanted spaces from the dataframe data.replace('?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present(data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values(data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps scaled_num_df = preprocessor.scale_numerical_columns(data) cat_df = preprocessor.encode_categorical_columns(data) X = pd.concat([scaled_num_df, cat_df], axis=1) file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(X)#drops the first column for cluster prediction X['clusters']=clusters clusters=X['clusters'].unique() predictions=[] for i in clusters: cluster_data= X[X['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result=(model.predict(cluster_data)) for res in result: if res==0: predictions.append('<=50K') else: predictions.append('>50K') final= pd.DataFrame(list(zip(predictions)),columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.scaleData(data) #data = preprocessor.enocdeCategoricalvalues(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data) #drops the first column for cluster prediction data['clusters'] = clusters clusters = data['clusters'].unique() result = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): if val == 0: result.append("Lodgepole_Pine") elif val == 1: result.append("Spruce_Fir") elif val == 2: result.append("Douglas_fir") elif val == 3: result.append("Krummholz") elif val == 4: result.append("Ponderosa_Pine") elif val == 5: result.append("Aspen") elif val == 6: result.append("Cottonwood_Willow") result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) # print(data) # #scale the prediction data data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) # #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize blank list for storing predicitons for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['TSH_measured', 'T3_measured', 'TT4_measured', 'T4U_measured', 'FTI_measured', 'TBG_measured', 'TBG', 'TSH']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) is_null_present=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data)#drops the first column for cluster prediction data['clusters']=clusters clusters=data['clusters'].unique() result=[] # initialize balnk list for storing predicitons with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values encoder = pickle.load(file) for i in clusters: cluster_data= data[data['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (encoder.inverse_transform(model.predict(cluster_data))): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #cdrop 'Unnamed: 0' for cl in data.columns: if cl == 'Unnamed: 0': data.drop('Unnamed: 0', axis=1, inplace=True) # Dropping column after performing EDA preprocessor_cus = preprocess_cus.Preprocessor_cus( self.file_object, self.log_writer) data = preprocessor_cus.drop_column(data) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) # get encoded values for categorical data data = preprocessor_cus.test_data_encode(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) result = [] # initialize balnk list for storing predicitons model = file_loader.load_model('CatBoost') for val in (model.predict(data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True) result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data,[]) # remove the column as it doesn't contribute to prediction. data.replace('?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present(data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values(data, cols_with_missing_values) # missing value imputation # encode categorical data #data = preprocessor.encode_categorical_columns(data) df=data.copy() df.drop(labels=['Sex'],axis=1,inplace=True) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters=kmeans.predict(df) data['clusters']=clusters data = preprocessor.encode_categorical_columns(data) clusters=data['clusters'].unique() predictions=[] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(np.array(cluster_data))) for res in result: if res == 0: predictions.append('1-8 Rings') elif res == 1: predictions.append('11+ Rings') else: predictions.append('9-10 Rings') final= pd.DataFrame(list(zip(predictions)),columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path , final
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"]) is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) if(is_null_present): data=data.dropna(how='any') # get encoded values for categorical data data = preprocessor.encodeCategoricalValues(data) #scale the prediction data data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction data_scaled['clusters']=clusters clusters=data_scaled['clusters'].unique() result=[] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data= data_scaled[data_scaled['clusters']==i] cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result,columns=['Predictions']) path="Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # Proceeding with more data pre-processing steps X = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( X) #drops the first column for cluster prediction X['clusters'] = clusters clusters = X['clusters'].unique() predictions = [] for i in clusters: cluster_data = X[X['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) predictions = predictions + list(result) final = pd.DataFrame(predictions, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, [ 'id', 'region', 'url', 'region_url', 'image_url', 'state', 'type', 'dogs_allowed' ]) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def predictionFromModel(self, singlerecdata=None): try: self.loggerObj.logger_log('Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.loggerObj) if singlerecdata is None: data = data_getter.get_data() else: data = data_getter.get_data_for_rec(singlerecdata) preprocessor = preprocessing.Preprocessor(self.loggerObj) # repalcing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) data = preprocessor.dropUnnecessaryColumnsForPrediction(data) # check if missing values are present in the dataset is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) data = preprocessor.encodeCategoricalValuesPrediction(data) data = preprocessor.prediction_data_standardisation(data) file_loader = file_methods.File_Operation(self.loggerObj) model_name = file_loader.find_model_file() model = file_loader.load_model(model_name) result = [] # initialize balnk list for storing predicitons with open( 'EncoderPickle/enc.pickle', 'rb' ) as file: # let's load the encoder pickle file to decode the values encoder = pickle.load(file) if singlerecdata is None: for val in (encoder.inverse_transform(model.predict(data))): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) # appends result to prediction file self.loggerObj.logger_log('End of Prediction') return path else: val = encoder.inverse_transform(model.predict(data)) self.loggerObj.logger_log('End of Prediction') return val except Exception as ex: self.loggerObj.logger_log( 'Error occured while running the prediction!! Error:: %s' % ex) raise ex
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! self.log_writer.log(self.file_object,'Start of Prediction') data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) data=data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) is_null_present=preprocessor.is_null_present(data) if(is_null_present): data=preprocessor.impute_missing_values(data) cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data) data=preprocessor.remove_columns(data,cols_to_drop) #data=data.to_numpy() file_loader=file_methods.File_Operation(self.file_object,self.log_writer) kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction data['clusters']=clusters clusters=data['clusters'].unique() for i in clusters: #selecting all the records of a perticular cluster type cluster_data= data[data['clusters']==i] #getting all the wafer names wafer_names = list(cluster_data['Wafer']) #dropping wafer and clusters columns cluster_data = data.drop(['Wafer','clusters'],axis=1) #finding the model name for that cluster model_name = file_loader.find_correct_model_file(i) #loading the model using the model name model = file_loader.load_model(model_name) #these are the predicted values pred_values = list(model.predict(cluster_data)) #creating a dataframe with wafernames and predictions result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction']) #path to save the dataframe as csv file path = "Prediction_Output_File/Predictions.csv" #writing to csv files result.to_csv(path,header=True,mode='a+') #appends result to prediction file self.log_writer.log(self.file_object,'End of Prediction') except Exception as ex: self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data.replace(-1, 1, inplace=True) new_data = data[[ 'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59', 'A91' ]] #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict( new_data) #drops the first column for cluster prediction new_data['clusters'] = clusters clusters = new_data['clusters'].unique() result = [] # initialize balnk list for storing predicitons for i in clusters: cluster_data = new_data[new_data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Prediction']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data = preprocessor.logTransformation(data) #encode the prediction data data_scaled = preprocessor.encodeCategoricalValuesPrediction(data) ###Time features data = preprocessor.create_timefeatures(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) model = file_loader.load_model('XGBOOST') result.model.predict(data) result = pandas.DataFrame(result, columns=['Predictions']) result['Item_Identifier'] = data["Item_Identifier"] result["Outlet_Identifier"] = data["Outlet_Identifier"] path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) #data = preprocessor.logTransformation(data) cols_to_drop = ["Item_Identifier", "Outlet_Identifier"] data_useful = preprocessor.remove_columns(data, cols_to_drop) #scale the prediction data data_scaled = preprocessor.scale_numerical_columns(data_useful) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) result['Item_Identifier'] = data["Item_Identifier"] result["Outlet_Identifier"] = data["Outlet_Identifier"] path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'policy_number', 'policy_bind_date', 'policy_state', 'insured_zip', 'incident_location', 'incident_date', 'incident_state', 'incident_city', 'insured_hobbies', 'auto_make', 'auto_model', 'auto_year', 'age', 'total_claim_amount' ]) # remove the column as it doesn't contribute to prediction. data.replace( '?', np.NaN, inplace=True) # replacing '?' with NaN values for imputation # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # missing value imputation # encode categorical data data = preprocessor.encode_categorical_columns(data) data = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') ##Code changed clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() predictions = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('N') else: predictions.append('Y') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() self.log_writer.log(self.file_object, 'Let me chk data') print(data.head()) #self.log_writer.log(self.file_object,data.head()) #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) #data=preprocessor.remove_columns(data,['Wafer']) # remove the unnamed column as it doesn't contribute to prediction. data = preprocessor.binning(data) #removing unwanted columns as discussed in the EDA part in ipynb file data = preprocessor.dropUnnecessaryColumns(data, ['Ageband']) #print(data.isnull().sum()) data = preprocessor.combiningfornewfeature(data) data = preprocessor.dropUnnecessaryColumns( data, ['Parch', 'Sibsp', 'FamilySize']) data = preprocessor.convertCategoricalfeatureIntonumeric(data) data = preprocessor.binningfare(data) data = preprocessor.dropUnnecessaryColumns( data, ['FareBand', 'PassengerId']) print(data.head()) #print(data.isnull().sum()) # check if missing values are present in the dataset is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) # if missing values are there, replace them appropriately. if (is_null_present): data = preprocessor.impute_missing_values( data) # missing value imputation self.log_writer.log(self.file_object, '--Fati-03.5--') data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) #data_scaled = pandas.DataFrame(data,columns=data.columns) self.log_writer.log(self.file_object, '--Fati-04--,It worked :)') ##----Predictions left only, will work after lunch IA data = data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) RfClassifier = file_loader.load_model('RandomForestClassifier') self.log_writer.log(self.file_object, '--Fati-05--') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) classifier = RfClassifier.predict( data_scaled) #drops the first column for cluster prediction self.log_writer.log(self.file_object, '--Fati-06--') #data_scaled['clusters']=clusters #self.log_writer.log(self.file_object,'--Fati-07--') #clusters=data_scaled['clusters'].unique() self.log_writer.log(self.file_object, '--Fati-08--') print(classifier) #result=[] # initialize blank list for storing predicitons #with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) #for i in clusters: # cluster_data= data_scaled[data_scaled['clusters']==i] # cluster_data = cluster_data.drop(['clusters'],axis=1) # model_name = file_loader.find_correct_model_file(i) #model = file_loader.load_model(model_name) # result.append(val) result = pandas.DataFrame(classifier, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns( data, ["Index_Product", "sku", "oe_constraint" ]) #removing oe_constraint as it was removed in training data = preprocessor.encodeCategoricalValuesPred(data) is_null_present = preprocessor.is_null_present(data) if (is_null_present): #data=preprocessor.impute_missing_values(data) data = data.dropna() #cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data) #data=preprocessor.remove_columns(data,cols_to_drop) data = preprocessor.scale_numerical_columns(data) data = preprocessor.pcaTransformation(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) # kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) #clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction #data['clusters']=clusters ##for i in clusters: #cluster_data= data[data['clusters']==i] #wafer_names = list(cluster_data['Wafer']) ##cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file() model = file_loader.load_model(model_name) result = list(model.predict(data)) #result = pandas.DataFrame(list(zip(wafer_names,result)),columns=['Wafer','Prediction']) result = pandas.DataFrame(result, columns=['Prediction']) result["Prediction"] = result["Prediction"].map({ 0: "Yes", 1: "No" }) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) #data = preprocessor.dropUnnecessaryColumns(data,['veiltype']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): #data=preprocessor.impute_missing_values(data,cols_with_missing_values) data = preprocessor.handleMissingValues( data) # missing value imputation by mean # we get these columns while training and we dropped them there, so we will drop it from here cols_to_drop = ['cd_000', 'ch_000'] # drop the columns obtained above X = preprocessor.remove_columns(data, cols_to_drop) X = preprocessor.scale_numerical_columns(X) X = preprocessor.pcaTransformation(X) # get encoded values for categorical data #data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) #kmeans=file_loader.load_model('KMeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) #clusters=kmeans.predict(data)#drops the first column for cluster prediction #data['clusters']=clusters #clusters=data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) #for i in clusters: #cluster_data= data[data['clusters']==i] #cluster_data = cluster_data.drop(['clusters'],axis=1) model_name = file_loader.find_correct_model_file() model = file_loader.load_model(model_name) for val in (model.predict(X)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) result['Predictions'] = result['Predictions'].map({ 0: 'neg', 1: 'pos' }) path = "Prediction_Output_File/Predictions.csv" result.to_csv("Prediction_Output_File/Predictions.csv", header=True) #appends result to prediction file self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_db_writer.log(self.log_database, self.log_collection, 'Start of Prediction') print("start of prediction") data_getter = data_loader_prediction.Data_Getter_Pred( self.log_database, self.log_collection, self.execution_id) data = data_getter.get_data() path = "" if data.__len__() == 0: self.log_db_writer.log( self.log_database, self.log_collection, "No data was present to perform prediction existing prediction method" ) return path, "No data was present to perform prediction" #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.log_database, self.log_collection, self.execution_id) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values(data) data = preprocessor.logTransformation(data) print("after log Transformation") print(data) #scale the prediction data data_scaled = pandas.DataFrame( preprocessor.standardScalingData(data), columns=data.columns) print("standard scaling for data completed") print(data_scaled) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.log_database, self.log_collection, self.execution_id) kmeans = file_loader.load_model('kkmeans') ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict( data_scaled) #drops the first column for cluster prediction data_scaled['clusters'] = clusters clusters = data_scaled['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data_scaled[data_scaled['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) print(model_name) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data.values)): result.append(val) result = pandas.DataFrame(result, columns=['strength-Predictions']) #result = list(model.predict(cluster_data)) #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction']) #for val in (model.predict(cluster_data.values)): # result.append(val) #print(self.result.shape) print("results after prediction with prediction columns") print(result) path = "Prediction-Output-File" #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file self.az_blob_mgt.saveDataFrametoCSV( path, "cement-strength-prediction.csv", result, header=True, mode="a+") self.log_db_writer.log(self.log_database, self.log_collection, 'End of Prediction') except Exception as ex: self.log_db_writer.log( self.log_database, self.log_collection, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, result.head().to_json(orient="records")
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.dropUnnecessaryColumns(data, ['veil-type']) # replacing '?' values with np.nan as discussed in the EDA part data = preprocessor.replaceInvalidValuesWithNull(data) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) # get encoded values for categorical data data = preprocessor.encodeCategoricalValuesPrediction(data) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') print(kmeans.labels_) ##Code changed #pred_data = data.drop(['Wafer'],axis=1) clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() result = [] # initialize blank list for storing predicitons # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values # encoder = pickle.load(file) for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) for val in (model.predict(cluster_data)): result.append(val) result = pandas.DataFrame(result, columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" self.awsObj.saveDataframeToCsv('Prediction_Output_File', 'Predictions.csv', result) self.log_writer.log(self.file_object, 'End of Prediction') msg = MIMEMultipart() msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str( datetime.now()) body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg' msg.attach(MIMEText(body, 'html')) to_addr = ['*****@*****.**'] self.emailObj.trigger_mail(to_addr, [], msg) except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile() self.log_writer.log(self.file_object, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.file_object, self.log_writer) data = data_getter.get_data() preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer) data = preprocessor.remove_columns(data, [ 'policy_number', 'policy_bind_date', 'policy_state', 'insured_zip', 'incident_location', 'incident_date', 'incident_state', 'incident_city', 'insured_hobbies', 'auto_make', 'auto_model', 'auto_year', 'age', 'total_claim_amount' ]) data.replace('?', np.NaN, inplace=True) is_null_present, cols_with_missing_values = preprocessor.is_null_present( data) if (is_null_present): data = preprocessor.impute_missing_values( data, cols_with_missing_values) data = preprocessor.encode_categorical_columns(data) data = preprocessor.scale_numerical_columns(data) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) kmeans = file_loader.load_model('KMeans') clusters = kmeans.predict(data) data['clusters'] = clusters clusters = data['clusters'].unique() predictions = [] for i in clusters: cluster_data = data[data['clusters'] == i] cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = (model.predict(cluster_data)) for res in result: if res == 0: predictions.append('N') else: predictions.append('Y') final = pd.DataFrame(list(zip(predictions)), columns=['Predictions']) path = "Prediction_Output_File/Predictions.csv" final.to_csv("Prediction_Output_File/Predictions.csv", header=True, mode='a+') self.log_writer.log(self.file_object, 'End of Prediction') except Exception as ex: self.log_writer.log( self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path
def predictionFromModel(self): try: self.pred_data_val.deletePredictionFile( ) #deletes the existing prediction file from last run! self.log_db_writer.log(self.log_database, self.log_collection, 'Start of Prediction') data_getter = data_loader_prediction.Data_Getter_Pred( self.log_database, self.log_collection, self.execution_id) data = data_getter.get_data() path = "" if data.__len__() == 0: self.log_db_writer.log( self.log_database, self.log_collection, "No data was present to perform prediction existing prediction method" ) return path, "No data was present to perform prediction" #code change # wafer_names=data['Wafer'] # data=data.drop(labels=['Wafer'],axis=1) preprocessor = preprocessing.Preprocessor(self.log_database, self.log_collection, self.execution_id) is_null_present = preprocessor.is_null_present(data) if (is_null_present): data = preprocessor.impute_missing_values(data) cols_to_drop = preprocessor.get_columns_with_zero_std_deviation( data) data = preprocessor.remove_columns(data, cols_to_drop) #data=data.to_numpy() file_loader = file_methods.File_Operation(self.log_database, self.log_collection, self.execution_id) kmeans = file_loader.load_model('KMeans') #print(kmeans) ##Code changed pred_data = data.drop(['Wafer'], axis=1) clusters = kmeans.predict( pred_data) #drops the first column for cluster prediction data['clusters'] = clusters clust = data['clusters'].unique() for i in clust: cluster_data = data[ data['clusters'] == i] # filteing of data with clusters no 0,1,2,etc. wafer_names = list(cluster_data['Wafer']) cluster_data = data.drop(labels=['Wafer'], axis=1) cluster_data = cluster_data.drop(['clusters'], axis=1) model_name = file_loader.find_correct_model_file(i) model = file_loader.load_model(model_name) result = list(model.predict(cluster_data)) self.result = pandas.DataFrame(list(zip(wafer_names, result)), columns=['Wafer', 'Prediction']) print(self.result.shape) print(self.result) #path="Prediction_Output_File/Predictions.csv" path = "prediction-output-file" self.az_blob_mgt.saveDataFrametoCSV(path, "prediction.csv", self.result, header=True, mode="a+") #result.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file #self.log_writer.log(self.file_object,'End of Prediction') self.log_db_writer.log(self.log_database, self.log_collection, 'End of prediction') except Exception as ex: #self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) self.log_db_writer.log( self.log_database, self.log_collection, 'Error occured while running the prediction!! Error:: %s' % ex) raise ex return path, self.result.head().to_json(orient="records")