def __init__(self, file_name, model_id, train_size=0.8): self.train_size = train_size self.helper = Helper() self.label = 'Average User Rating' #label, output, y self.data = pd.read_csv(file_name) #data -> dataframe self.linearRegression = PredictModel( model_id) #1 for multivariate, 2 for polynomial self.preprocess_obj = PredictionPreprocess(self.label, train_size * 10)
def __init__(self, file_name, model_id, pca_mode=False, train_size=0.8): self.train_size = train_size self.helper = Helper() self.label = 'Rate' # label, output, y self.classes = ['Low', 'Intermediate', 'High'] self.data = pd.read_csv(file_name) # data -> dataframe self.preprocess_obj = ClassificationPreprocess( self.label, self.classes, train_size * 10) #1 for classification. self.classification_model = ClassifyModel(model_id, pca_mode)
def test_for_saved_model(self, X_test, y_test): helper = Helper() loaded_model = helper.retreive_model('classification', self.model_id) if self.pca_mode: principalComponents = self.pca.transform(X_test) X_test = principalComponents y_test_predicted = loaded_model.predict(X_test) return self.metrics_calculations(y_test, y_test_predicted)
def test_for_saved_model(self, X_test, y_test): helper = Helper() loaded_model = helper.retreive_model('prediction', self.model_id) if self.model_id == 2: #Polynomial linear model. poly_features = PolynomialFeatures(degree=self.poly_degree) X_test = poly_features.fit_transform(X_test) #Transform to higher degree. y_test_predicted = loaded_model.predict(X_test) print("Model Coefs:\n", loaded_model.coef_) return self.metrics_calculations(y_test, y_test_predicted)
def train(self, X_train, y_train): if self.model_id == 2: #Polynomial linear model. poly_features = PolynomialFeatures(degree=self.poly_degree) #built in. X_train = poly_features.fit_transform(X_train) #Transform to higher degree. self.learning_model.fit(X_train, y_train) y_train_predicted = self.learning_model.predict(X_train) helper = Helper() helper.save_model(self.learning_model, "prediction", self.model_id) return self.metrics_calculations(y_train, y_train_predicted)
def train(self, X_train, y_train): if self.pca_mode: principalComponents = self.build_pca(X_train, y_train) X_train = principalComponents if self.model_id == 3: #KNN model self.learning_model.n_neighbors = self.find_best_k(X_train, y_train, 30) self.learning_model.fit(X_train, y_train) y_train_predicted = self.learning_model.predict(X_train) helper = Helper() helper.save_model(self.learning_model, "classification", self.model_id) return self.metrics_calculations(y_train, y_train_predicted)
def load_structures(self): helper = Helper() self.to_be_hot_encoded = helper.load_structure('l1' +'_'+ str(self.file_id)) self.to_be_encoded_dates = helper.load_structure('l2'+'_'+ str(self.file_id)) self.to_be_dropped = helper.load_structure('l3'+'_'+ str(self.file_id)) self.list_all_unique = helper.load_structure('l4'+'_'+ str(self.file_id)) self.dict_min_max = helper.load_structure('d1'+'_'+ str(self.file_id)) self.dict_average = helper.load_structure('d2'+'_'+ str(self.file_id))
def save_structures(self): helper = Helper() lists = [self.to_be_hot_encoded, self.to_be_encoded_dates, self.to_be_dropped, self.list_all_unique] dics = [self.dict_min_max, self.dict_average] i = 1 for item in lists: helper.save_structure(item, 'l' + str(i) + '_' + str(self.file_id)) i += 1 i = 1 for item in dics: helper.save_structure(item, 'd' + str(i)+ '_' + str(self.file_id)) i += 1
class Program: def __init__(self, file_name, model_id, pca_mode=False, train_size=0.8): self.train_size = train_size self.helper = Helper() self.label = 'Rate' # label, output, y self.classes = ['Low', 'Intermediate', 'High'] self.data = pd.read_csv(file_name) # data -> dataframe self.preprocess_obj = ClassificationPreprocess( self.label, self.classes, train_size * 10) #1 for classification. self.classification_model = ClassifyModel(model_id, pca_mode) def start(self): if self.train_size == 1: #Train with all data. data_train = self.data data_test = [] else: data_train, data_test = train_test_split(self.data, test_size=1 - self.train_size, shuffle=True) # Train Process. cleaned_data_train = self.preprocess_obj.start_for_training(data_train) copy_cleaned_data_train = cleaned_data_train y_train = copy_cleaned_data_train[self.label] # Train_Target X_train = copy_cleaned_data_train.drop([self.label], axis=1) # Train_Input y_train = y_train.astype( 'int' ) #Converting from type 'object' to type 'int32' for models to recognize. self.helper.start_timer() train_accuracy, convMatrix_train, miss_count = self.classification_model.train( X_train, y_train) time_elapsed = self.helper.elapsed_time() print("Train Output\n", "Accuracy:", train_accuracy, '\n', convMatrix_train, '\n', "Elapsed Time:", time_elapsed) HeatMap.show(convMatrix_train) # Test Process. if len(data_test) > 0: cleaned_data_test = self.preprocess_obj.start_for_testing( data_test) copy_cleaned_data_test = cleaned_data_test y_test = copy_cleaned_data_test[self.label] # Test_Target X_test = copy_cleaned_data_test.drop([self.label], axis=1) # Test_Input y_test = y_test.astype( 'int' ) #Converting from type 'object' to type 'int32' for models to recognize. self.helper.start_timer() test_accuracy, convMatrix_test, miss_count = self.classification_model.test( X_test, y_test) time_elapsed = self.helper.elapsed_time() print("Test Output\n", "Accuracy:", test_accuracy, '\n', convMatrix_test, '\n', "Elapsed Time:", time_elapsed) HeatMap.show(convMatrix_test) #graph = Graph(X_train, y_train, cleaned_data_train, self.label) #graph.feature_VS_feature_points() def final_test(self, file_name): final_test_data = pd.read_csv(file_name) cleaned_data_test = self.preprocess_obj.start_for_testing( final_test_data) # Test y_test = cleaned_data_test[self.label] # Test_Target X_test = cleaned_data_test.drop([self.label], axis=1) # Test_Input self.helper.start_timer() test_accuracy, convMatrix_test, miss_count = self.classification_model.test_for_saved_model( X_test, y_test) time_elapsed = self.helper.elapsed_time() print("Test Output\n", "Accuracy:", test_accuracy, '\n', convMatrix_test, '\n', "Elapsed Time:", time_elapsed) HeatMap.show(convMatrix_test)
class Program: def __init__(self, file_name, model_id, train_size=0.8): self.train_size = train_size self.helper = Helper() self.label = 'Average User Rating' #label, output, y self.data = pd.read_csv(file_name) #data -> dataframe self.linearRegression = PredictModel( model_id) #1 for multivariate, 2 for polynomial self.preprocess_obj = PredictionPreprocess(self.label, train_size * 10) def start(self): if self.train_size == 1: #Train with all data. data_train = self.data data_test = [] else: data_train, data_test = train_test_split(self.data, test_size=1 - self.train_size, shuffle=True) # Train Process. cleaned_data_train = self.preprocess_obj.start_for_training(data_train) copy_cleaned_data_train = cleaned_data_train y_train = copy_cleaned_data_train[self.label] # Train_Target X_train = copy_cleaned_data_train.drop([self.label], axis=1) # Train_Input self.helper.start_timer() train_error, train_r2_score = self.linearRegression.train( X_train, y_train) elapsed_time = self.helper.elapsed_time() print("Train Output\n", "MSE:", train_error, '\n', "R2 Score:", train_r2_score, "\n", "Elapsed Time:", elapsed_time) # Test Process. if len(data_test) > 0: cleaned_data_test = self.preprocess_obj.start_for_testing( data_test) # Test copy_cleaned_data_test = cleaned_data_test y_test = copy_cleaned_data_test[self.label] # Test_Target X_test = copy_cleaned_data_test.drop([self.label], axis=1) # Test_Input self.helper.start_timer() test_error, test_r2_score = self.linearRegression.test( X_test, y_test) elapsed_time = self.helper.elapsed_time() print("Test Output\n", "MSE:", test_error, '\n', "R2 Score:", test_r2_score, "\n", "Elapsed Time:", elapsed_time) corr = Correlation(cleaned_data_train) corr.correlate() def final_test(self, file_name): final_test_data = pd.read_csv(file_name) cleaned_data_test = self.preprocess_obj.start_for_testing( final_test_data) # Test y_test = cleaned_data_test[self.label] # Test_Target X_test = cleaned_data_test.drop([self.label], axis=1) # Test_Input self.helper.start_timer() test_error, test_r2_score = self.linearRegression.test_for_saved_model( X_test, y_test) elapsed_time = self.helper.elapsed_time() print("Test Output\n", "MSE:", test_error, '\n', "R2 Score:", test_r2_score, "\n", "Elapsed Time:", elapsed_time)