def train_model(self): self.log_writer.log(self.file_object, 'Start of Training') try: data_getter = data_loader.Data_Getter(self.file_object, self.log_writer) main_data, additional_data = data_getter.get_data() preprocessor = data_preprocessing.PreProcessor( self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(main_data) if is_null_present == True: main_data = preprocessor.impute_missing_values(main_data) main_data = preprocessor.map_ip_to_country(main_data, additional_data) main_data = preprocessor.difference_signup_and_purchase(main_data) main_data = preprocessor.encoding_browser(main_data) main_data = preprocessor.encoding_source(main_data) main_data = preprocessor.encoding_sex(main_data) main_data = preprocessor.count_frequency_encoding_country( main_data) main_data = preprocessor.remove_unwanted_cols(main_data) x, y = preprocessor.separate_label_feature(main_data, 'class') x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) #x_train,y_train = preprocessor.over_sampling_smote(x_train,y_train) model_finder = tuner.Model_Finder(self.file_object, self.log_writer) best_model_name, best_model = model_finder.get_best_model( x_train, y_train, x_test, y_test) file_op = file_methods.File_Operation(self.file_object, self.log_writer) save_model = file_op.save_model(best_model, best_model_name) self.log_writer.log(self.file_object, 'Successfull End of Training') self.file_object.close() except Exception as e: self.log_writer.log(self.file_object, 'Unsuccessfull End of Training') self.file_object.close() raise e
def predict_from_model(self): self.log_writer.log(self.file_object, 'Start of Prediction') try: self.pred_data_val.deletePredictionFile() data_getter = data_loader_prediction.Data_Getter( self.file_object, self.log_writer) main_data, additional_data = data_getter.get_data() preprocessor = data_preprocessing.PreProcessor( self.file_object, self.log_writer) is_null_present = preprocessor.is_null_present(main_data) if is_null_present == True: main_data = preprocessor.impute_missing_values(main_data) main_data = preprocessor.map_ip_to_country(main_data, additional_data) main_data = preprocessor.difference_signup_and_purchase(main_data) main_data = preprocessor.encoding_browser(main_data) main_data = preprocessor.encoding_source(main_data) main_data = preprocessor.encoding_sex(main_data) main_data = preprocessor.count_frequency_encoding_country( main_data) main_data, unwanted_data = preprocessor.remove_unwanted_cols( main_data, return_unwanted_data=True) #x,y = preprocessor.separate_label_feature(main_data,'class') #x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3) #x_train,y_train = preprocessor.over_sampling_smote(x_train,y_train) #model_finder = tuner.Model_Finder(self.file_object,self.log_writer) #best_model_name,best_model = model_finder.get_best_model(x_train,y_train,x_test,y_test) file_loader = file_methods.File_Operation(self.file_object, self.log_writer) #save_model = file_op.save_model(best_model,best_model_name) model_name = file_loader.find_correct_model_file() model = file_loader.load_model(model_name) result = list(model.predict(main_data)) data = list( zip(unwanted_data['user_id'], unwanted_data['signup_time'], unwanted_data['purchase_time'], unwanted_data['device_id'], unwanted_data['source'], unwanted_data['browser'], unwanted_data['sex'], unwanted_data['ip_address'], unwanted_data['Country'], result)) result = pd.DataFrame(data, columns=[ 'user_id', 'signup_time', 'purchase_time', 'device_id', 'source', 'browser', 'sex', 'ip_address', 'Country', 'Prediction' ]) path = "Prediction_Output_File/Prediction.csv" result.to_csv(path, header=True, mode='a+') self.log_writer.log(self.file_object, 'Successfull End of Prediction') self.file_object.close() except Exception as e: self.log_writer.log( self.file_object, 'Error Occured while doing the Prediction !! Error :: %s' % str(e)) self.file_object.close() raise e return path, result.head().to_json(orient="records")