class trainModel:
    def __init__(self):
        self.log_writer = logger.App_Logger()
        self.mongo = To_mongo_db('wafer')
        self.aws = Aws_Bucket_operation()

        # self.file_object = open("Training_Logs/ModelTrainingLog.txt", 'a+')

    def trainingModel(self):
        # Logging the start of Training
        self.log_writer.log('wafer_log', 'Start of Training')
        try:
            # Getting the data from the source
            # data_getter=data_loader.Data_Getter(self.file_object,self.log_writer)
            data = self.mongo.downlaod_all_from_mongo('wafer_good_data',
                                                      'temp_db')
            """doing the data preprocessing"""

            preprocessor = preprocessing.Preprocessor('wafer_log',
                                                      self.log_writer)
            data = preprocessor.remove_columns(
                data, ['Wafer']
            )  # remove the wafer column as it doesn't contribute to prediction.

            # create separate features and labels
            X, Y = preprocessor.separate_label_feature(
                data, label_column_name='Good/Bad')

            # check if missing values are present in the dataset

            # if missing values are there, replace them appropriately.
            X.replace(to_replace='NULL', value=np.nan,
                      inplace=True)  # consumes  4 sec to compute
            is_null_present = preprocessor.is_null_present(X)
            if (is_null_present):
                X = preprocessor.impute_missing_values(
                    X)  # missing value imputation

            # check further which columns do not contribute to predictions
            # if the standard deviation for a column is zero, it means that the column has constant values
            # and they are giving the same output both for good and bad sensors
            # prepare the list of such columns to drop
            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                X)  # consumes a lot of time
            # drop the columns obtained above
            X = preprocessor.remove_columns(X, cols_to_drop)
            """ Applying the clustering approach"""

            kmeans = clustering.KMeansClustering(
                'wafer_log', self.log_writer)  # object initialization.
            number_of_clusters = kmeans.elbow_plot(
                X
            )  #  using the elbow plot to find the number of optimum clusters

            # Divide the data into clusters
            X = kmeans.create_clusters(X, number_of_clusters)

            #create a new column in the dataset consisting of the corresponding cluster assignments.

            # X=pd.DataFrame.join(X,Y)
            X['Labels'] = Y.values

            # getting the unique clusters from our dataset
            list_of_clusters = X['Cluster'].unique()
            """parsing all the clusters and looking for the best ML algorithm to fit on individual cluster"""

            for index, i in enumerate(list_of_clusters):
                cluster_data = X[X['Cluster'] ==
                                 i]  # filter the data for one cluster
                # Prepare the feature and Label columns
                cluster_features = cluster_data.drop(['Labels', 'Cluster'],
                                                     axis=1)
                cluster_label = cluster_data['Labels']

                # splitting the data into training and test set for each cluster one by one
                x_train, x_test, y_train, y_test = train_test_split(
                    cluster_features,
                    cluster_label,
                    test_size=1 / 3,
                    random_state=355)

                model_finder = tuner.Model_Finder(
                    'wafer_log', self.log_writer)  # object initialization

                #getting the best model for each of the clusters
                best_model_name, best_model = model_finder.get_best_model(
                    x_train, y_train, x_test, y_test)

                #saving the best model to the directory.
                # file_op = file_methods.File_Operation('wafer_log',self.log_writer)
                # save_model=file_op.save_model(best_model,best_model_name+str(i))
                print(best_model)
                best_model = pickle.dumps(best_model)
                self.aws.Upload_To_S3_obj(best_model,
                                          best_model_name + str(index) +
                                          '.sav',
                                          bucket_prefix='wafer-model')

            # logging the successful Training
            self.log_writer.log('wafer_log', 'Successful End of Training')
            # self.file_object.close()

        except Exception as err:
            # logging the unsuccessful Training
            self.log_writer.log('wafer_log', 'Unsuccessful End of Training')
            # self.file_object.close()
            print(str(err))
            raise err
class KMeansClustering:
    """
            This class shall  be used to divide the data into clusters before training.

            Written By: Rajat Bisoi
            Version: 1.0
            Revisions: None

            """
    def __init__(self, db_name, logger_object):
        self.db_name = db_name
        self.logger_object = logger_object
        self.aws = Aws_Bucket_operation()

    def elbow_plot(self, data):
        """
                        Method Name: elbow_plot
                        Description: This method saves the plot to decide the optimum number of clusters to the file.
                        Output: A picture saved to the directory
                        On Failure: Raise Exception

                        Written By: Rajat Bisoi
                        Version: 1.0
                        Revisions: None

                """
        self.logger_object.log(
            self.db_name,
            'Entered the elbow_plot method of the KMeansClustering class')
        wcss = []  # initializing an empty list

        try:
            for i in range(1, 11):
                kmeans = KMeans(
                    n_clusters=i, init='k-means++',
                    random_state=42)  # initializing the KMeans object
                kmeans.fit(data)  # fitting the data to the KMeans Algorithm
                wcss.append(kmeans.inertia_)
            plt.plot(
                range(1, 11), wcss
            )  # creating the graph between WCSS and the number of clusters
            plt.title('The Elbow Method')
            plt.xlabel('Number of clusters')
            plt.ylabel('WCSS')
            #plt.show()
            plt.savefig('preprocessing_data/K-Means_Elbow.PNG'
                        )  # saving the elbow plot locally
            # finding the value of the optimum cluster programmatically
            self.kn = KneeLocator(range(1, 11),
                                  wcss,
                                  curve='convex',
                                  direction='decreasing')
            self.logger_object.log(
                self.db_name,
                'The optimum number of clusters is: ' + str(self.kn.knee) +
                ' . Exited the elbow_plot method of the KMeansClustering class'
            )
            return self.kn.knee

        except Exception as e:
            self.logger_object.log(
                self.db_name,
                'Exception occured in elbow_plot method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.db_name,
                'Finding the number of clusters failed. Exited the elbow_plot method of the KMeansClustering class'
            )
            raise Exception()

    def create_clusters(self, data, number_of_clusters):
        """
                                Method Name: create_clusters
                                Description: Create a new dataframe consisting of the cluster information.
                                Output: A datframe with cluster column
                                On Failure: Raise Exception

                                Written By: Rajat Bisoi
                                Version: 1.0
                                Revisions: None

                        """
        self.logger_object.log(
            self.db_name,
            'Entered the create_clusters method of the KMeansClustering class')
        self.data = data
        try:
            self.kmeans = KMeans(n_clusters=number_of_clusters,
                                 init='k-means++',
                                 random_state=42)
            #self.data = self.data[~self.data.isin([np.nan, np.inf, -np.inf]).any(1)]
            self.y_kmeans = self.kmeans.fit_predict(
                data)  #  divide data into clusters

            # self.file_op = file_methods.File_Operation(self.db_name, self.logger_object)
            # self.save_model = self.file_op.save_model(self.kmeans, 'KMeans') # saving the KMeans model to directory
            self.kmeans = pickle.dumps(self.kmeans)
            self.aws.Upload_To_S3_obj(
                object=self.kmeans,
                file_name='kmeans.sav',
                bucket_prefix='wafer-model'
            )  # passing 'Model' as the functions need three parameters

            self.data[
                'Cluster'] = self.y_kmeans  # create a new column in dataset for storing the cluster information
            self.logger_object.log(
                self.db_name, 'succesfully created ' + str(self.kn.knee) +
                'clusters. Exited the create_clusters method of the KMeansClustering class'
            )
            return self.data
        except Exception as e:
            self.logger_object.log(
                self.db_name,
                'Exception occured in create_clusters method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.db_name,
                'Fitting the data to clusters failed. Exited the create_clusters method of the KMeansClustering class'
            )
            raise Exception()