Example #1
0
    def __init__(self, file_name, model_id, train_size=0.8):
        self.train_size = train_size
        self.helper = Helper()
        self.label = 'Average User Rating'  #label, output, y

        self.data = pd.read_csv(file_name)  #data -> dataframe
        self.linearRegression = PredictModel(
            model_id)  #1 for multivariate, 2 for polynomial
        self.preprocess_obj = PredictionPreprocess(self.label, train_size * 10)
    def __init__(self, file_name, model_id, pca_mode=False, train_size=0.8):
        self.train_size = train_size
        self.helper = Helper()
        self.label = 'Rate'  # label, output, y
        self.classes = ['Low', 'Intermediate', 'High']

        self.data = pd.read_csv(file_name)  # data -> dataframe
        self.preprocess_obj = ClassificationPreprocess(
            self.label, self.classes, train_size * 10)  #1 for classification.
        self.classification_model = ClassifyModel(model_id, pca_mode)
    def test_for_saved_model(self, X_test, y_test):
        helper = Helper()
        loaded_model = helper.retreive_model('classification', self.model_id)

        if self.pca_mode:
            principalComponents = self.pca.transform(X_test)
            X_test = principalComponents

        y_test_predicted = loaded_model.predict(X_test)

        return self.metrics_calculations(y_test, y_test_predicted)
    def test_for_saved_model(self, X_test, y_test):
        helper = Helper()
        loaded_model = helper.retreive_model('prediction', self.model_id)

        if self.model_id == 2: #Polynomial linear model.
            poly_features = PolynomialFeatures(degree=self.poly_degree)
            X_test = poly_features.fit_transform(X_test) #Transform to higher degree.

        y_test_predicted = loaded_model.predict(X_test)

        print("Model Coefs:\n", loaded_model.coef_)
        return self.metrics_calculations(y_test, y_test_predicted)
    def train(self, X_train, y_train):
        if self.model_id == 2: #Polynomial linear model.
            poly_features = PolynomialFeatures(degree=self.poly_degree) #built in.
            X_train = poly_features.fit_transform(X_train) #Transform to higher degree.

        self.learning_model.fit(X_train, y_train)

        y_train_predicted = self.learning_model.predict(X_train)

        helper = Helper()
        helper.save_model(self.learning_model, "prediction", self.model_id)

        return self.metrics_calculations(y_train, y_train_predicted)
    def train(self, X_train, y_train):
        if self.pca_mode:
            principalComponents = self.build_pca(X_train, y_train)
            X_train = principalComponents

        if self.model_id == 3: #KNN model
            self.learning_model.n_neighbors = self.find_best_k(X_train, y_train, 30)

        self.learning_model.fit(X_train, y_train)

        y_train_predicted = self.learning_model.predict(X_train)

        helper = Helper()
        helper.save_model(self.learning_model, "classification", self.model_id)

        return self.metrics_calculations(y_train, y_train_predicted)
Example #7
0
    def load_structures(self):
        helper = Helper()
        self.to_be_hot_encoded = helper.load_structure('l1' +'_'+ str(self.file_id))
        self.to_be_encoded_dates = helper.load_structure('l2'+'_'+ str(self.file_id))
        self.to_be_dropped = helper.load_structure('l3'+'_'+ str(self.file_id))
        self.list_all_unique = helper.load_structure('l4'+'_'+ str(self.file_id))

        self.dict_min_max = helper.load_structure('d1'+'_'+ str(self.file_id))
        self.dict_average = helper.load_structure('d2'+'_'+ str(self.file_id))
Example #8
0
    def save_structures(self):
        helper = Helper()
        lists = [self.to_be_hot_encoded, self.to_be_encoded_dates, self.to_be_dropped, self.list_all_unique]

        dics = [self.dict_min_max,  self.dict_average]

        i = 1
        for item in lists:
            helper.save_structure(item, 'l' + str(i) + '_' + str(self.file_id))
            i += 1

        i = 1
        for item in dics:
            helper.save_structure(item, 'd' + str(i)+ '_' + str(self.file_id))
            i += 1
class Program:
    def __init__(self, file_name, model_id, pca_mode=False, train_size=0.8):
        self.train_size = train_size
        self.helper = Helper()
        self.label = 'Rate'  # label, output, y
        self.classes = ['Low', 'Intermediate', 'High']

        self.data = pd.read_csv(file_name)  # data -> dataframe
        self.preprocess_obj = ClassificationPreprocess(
            self.label, self.classes, train_size * 10)  #1 for classification.
        self.classification_model = ClassifyModel(model_id, pca_mode)

    def start(self):
        if self.train_size == 1:  #Train with all data.
            data_train = self.data
            data_test = []
        else:
            data_train, data_test = train_test_split(self.data,
                                                     test_size=1 -
                                                     self.train_size,
                                                     shuffle=True)

        # Train Process.
        cleaned_data_train = self.preprocess_obj.start_for_training(data_train)
        copy_cleaned_data_train = cleaned_data_train
        y_train = copy_cleaned_data_train[self.label]  # Train_Target
        X_train = copy_cleaned_data_train.drop([self.label],
                                               axis=1)  # Train_Input
        y_train = y_train.astype(
            'int'
        )  #Converting from type 'object' to type 'int32' for models to recognize.

        self.helper.start_timer()
        train_accuracy, convMatrix_train, miss_count = self.classification_model.train(
            X_train, y_train)
        time_elapsed = self.helper.elapsed_time()
        print("Train Output\n", "Accuracy:", train_accuracy, '\n',
              convMatrix_train, '\n', "Elapsed Time:", time_elapsed)
        HeatMap.show(convMatrix_train)

        # Test Process.
        if len(data_test) > 0:
            cleaned_data_test = self.preprocess_obj.start_for_testing(
                data_test)
            copy_cleaned_data_test = cleaned_data_test
            y_test = copy_cleaned_data_test[self.label]  # Test_Target
            X_test = copy_cleaned_data_test.drop([self.label],
                                                 axis=1)  # Test_Input
            y_test = y_test.astype(
                'int'
            )  #Converting from type 'object' to type 'int32' for models to recognize.

            self.helper.start_timer()
            test_accuracy, convMatrix_test, miss_count = self.classification_model.test(
                X_test, y_test)
            time_elapsed = self.helper.elapsed_time()
            print("Test Output\n", "Accuracy:", test_accuracy, '\n',
                  convMatrix_test, '\n', "Elapsed Time:", time_elapsed)
            HeatMap.show(convMatrix_test)

        #graph = Graph(X_train, y_train, cleaned_data_train, self.label)
        #graph.feature_VS_feature_points()

    def final_test(self, file_name):
        final_test_data = pd.read_csv(file_name)
        cleaned_data_test = self.preprocess_obj.start_for_testing(
            final_test_data)  # Test

        y_test = cleaned_data_test[self.label]  # Test_Target
        X_test = cleaned_data_test.drop([self.label], axis=1)  # Test_Input

        self.helper.start_timer()
        test_accuracy, convMatrix_test, miss_count = self.classification_model.test_for_saved_model(
            X_test, y_test)
        time_elapsed = self.helper.elapsed_time()
        print("Test Output\n", "Accuracy:", test_accuracy, '\n',
              convMatrix_test, '\n', "Elapsed Time:", time_elapsed)
        HeatMap.show(convMatrix_test)
Example #10
0
class Program:
    def __init__(self, file_name, model_id, train_size=0.8):
        self.train_size = train_size
        self.helper = Helper()
        self.label = 'Average User Rating'  #label, output, y

        self.data = pd.read_csv(file_name)  #data -> dataframe
        self.linearRegression = PredictModel(
            model_id)  #1 for multivariate, 2 for polynomial
        self.preprocess_obj = PredictionPreprocess(self.label, train_size * 10)

    def start(self):
        if self.train_size == 1:  #Train with all data.
            data_train = self.data
            data_test = []
        else:
            data_train, data_test = train_test_split(self.data,
                                                     test_size=1 -
                                                     self.train_size,
                                                     shuffle=True)

        # Train Process.
        cleaned_data_train = self.preprocess_obj.start_for_training(data_train)
        copy_cleaned_data_train = cleaned_data_train
        y_train = copy_cleaned_data_train[self.label]  # Train_Target
        X_train = copy_cleaned_data_train.drop([self.label],
                                               axis=1)  # Train_Input

        self.helper.start_timer()
        train_error, train_r2_score = self.linearRegression.train(
            X_train, y_train)
        elapsed_time = self.helper.elapsed_time()
        print("Train Output\n", "MSE:", train_error, '\n', "R2 Score:",
              train_r2_score, "\n", "Elapsed Time:", elapsed_time)

        # Test Process.
        if len(data_test) > 0:
            cleaned_data_test = self.preprocess_obj.start_for_testing(
                data_test)  # Test
            copy_cleaned_data_test = cleaned_data_test
            y_test = copy_cleaned_data_test[self.label]  # Test_Target
            X_test = copy_cleaned_data_test.drop([self.label],
                                                 axis=1)  # Test_Input

            self.helper.start_timer()
            test_error, test_r2_score = self.linearRegression.test(
                X_test, y_test)
            elapsed_time = self.helper.elapsed_time()
            print("Test Output\n", "MSE:", test_error, '\n', "R2 Score:",
                  test_r2_score, "\n", "Elapsed Time:", elapsed_time)

        corr = Correlation(cleaned_data_train)
        corr.correlate()

    def final_test(self, file_name):
        final_test_data = pd.read_csv(file_name)
        cleaned_data_test = self.preprocess_obj.start_for_testing(
            final_test_data)  # Test

        y_test = cleaned_data_test[self.label]  # Test_Target
        X_test = cleaned_data_test.drop([self.label], axis=1)  # Test_Input

        self.helper.start_timer()
        test_error, test_r2_score = self.linearRegression.test_for_saved_model(
            X_test, y_test)
        elapsed_time = self.helper.elapsed_time()

        print("Test Output\n", "MSE:", test_error, '\n', "R2 Score:",
              test_r2_score, "\n", "Elapsed Time:", elapsed_time)