コード例 #1
0
 def __init__(self):
     # self.Batch_Directory = path
     self.schema_path = 'schema_prediction.json'
     self.logger = App_Logger()
     self.mongo = To_mongo_db('wafer')
     self.aws = Aws_Bucket_operation(
         local_file_name_address='config/bucket_name')
コード例 #2
0
 def __init__(self):
     self.log_writer = logger.App_Logger()
     self.mongo = To_mongo_db('wafer')
     self.aws = Aws_Bucket_operation()
コード例 #3
0
class trainModel:
    def __init__(self):
        self.log_writer = logger.App_Logger()
        self.mongo = To_mongo_db('wafer')
        self.aws = Aws_Bucket_operation()

        # self.file_object = open("Training_Logs/ModelTrainingLog.txt", 'a+')

    def trainingModel(self):
        # Logging the start of Training
        self.log_writer.log('wafer_log', 'Start of Training')
        try:
            # Getting the data from the source
            # data_getter=data_loader.Data_Getter(self.file_object,self.log_writer)
            data = self.mongo.downlaod_all_from_mongo('wafer_good_data',
                                                      'temp_db')
            """doing the data preprocessing"""

            preprocessor = preprocessing.Preprocessor('wafer_log',
                                                      self.log_writer)
            data = preprocessor.remove_columns(
                data, ['Wafer']
            )  # remove the wafer column as it doesn't contribute to prediction.

            # create separate features and labels
            X, Y = preprocessor.separate_label_feature(
                data, label_column_name='Good/Bad')

            # check if missing values are present in the dataset

            # if missing values are there, replace them appropriately.
            X.replace(to_replace='NULL', value=np.nan,
                      inplace=True)  # consumes  4 sec to compute
            is_null_present = preprocessor.is_null_present(X)
            if (is_null_present):
                X = preprocessor.impute_missing_values(
                    X)  # missing value imputation

            # check further which columns do not contribute to predictions
            # if the standard deviation for a column is zero, it means that the column has constant values
            # and they are giving the same output both for good and bad sensors
            # prepare the list of such columns to drop
            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                X)  # consumes a lot of time
            # drop the columns obtained above
            X = preprocessor.remove_columns(X, cols_to_drop)
            """ Applying the clustering approach"""

            kmeans = clustering.KMeansClustering(
                'wafer_log', self.log_writer)  # object initialization.
            number_of_clusters = kmeans.elbow_plot(
                X
            )  #  using the elbow plot to find the number of optimum clusters

            # Divide the data into clusters
            X = kmeans.create_clusters(X, number_of_clusters)

            #create a new column in the dataset consisting of the corresponding cluster assignments.

            # X=pd.DataFrame.join(X,Y)
            X['Labels'] = Y.values

            # getting the unique clusters from our dataset
            list_of_clusters = X['Cluster'].unique()
            """parsing all the clusters and looking for the best ML algorithm to fit on individual cluster"""

            for index, i in enumerate(list_of_clusters):
                cluster_data = X[X['Cluster'] ==
                                 i]  # filter the data for one cluster
                # Prepare the feature and Label columns
                cluster_features = cluster_data.drop(['Labels', 'Cluster'],
                                                     axis=1)
                cluster_label = cluster_data['Labels']

                # splitting the data into training and test set for each cluster one by one
                x_train, x_test, y_train, y_test = train_test_split(
                    cluster_features,
                    cluster_label,
                    test_size=1 / 3,
                    random_state=355)

                model_finder = tuner.Model_Finder(
                    'wafer_log', self.log_writer)  # object initialization

                #getting the best model for each of the clusters
                best_model_name, best_model = model_finder.get_best_model(
                    x_train, y_train, x_test, y_test)

                #saving the best model to the directory.
                # file_op = file_methods.File_Operation('wafer_log',self.log_writer)
                # save_model=file_op.save_model(best_model,best_model_name+str(i))
                print(best_model)
                best_model = pickle.dumps(best_model)
                self.aws.Upload_To_S3_obj(best_model,
                                          best_model_name + str(index) +
                                          '.sav',
                                          bucket_prefix='wafer-model')

            # logging the successful Training
            self.log_writer.log('wafer_log', 'Successful End of Training')
            # self.file_object.close()

        except Exception as err:
            # logging the unsuccessful Training
            self.log_writer.log('wafer_log', 'Unsuccessful End of Training')
            # self.file_object.close()
            print(str(err))
            raise err
コード例 #4
0
class File_Operation:
    """
                This class shall be used to save the model after training
                and load the saved model for prediction.

                Written By: Rajat Bisoi
                Version: 1.0
                Revisions: None

                """
    def __init__(self, file_object, logger_object):
        self.file_object = file_object
        self.logger_object = logger_object
        # self.model_directory='models/'
        self.aws = Aws_Bucket_operation()
        self.bytesIO = BytesIO()

#not used

    def save_model(self, model, filename):
        """
            Method Name: save_model
            Description: Save the model file to directory
            Outcome: File gets saved
            On Failure: Raise Exception

            Written By: Rajat Bisoi
            Version: 1.0
            Revisions: None
"""
        self.logger_object.log(
            self.file_object,
            'Entered the save_model method of the File_Operation class')
        try:
            path = os.path.join(
                self.model_directory,
                filename)  #create seperate directory for each cluster
            if os.path.isdir(
                    path
            ):  #remove previously existing models for each clusters
                shutil.rmtree(self.model_directory)
                os.makedirs(path)
            else:
                os.makedirs(path)  #
            with open(path + '/' + filename + '.sav', 'wb') as f:
                pickle.dump(model, f)  # save the model to file
                # self.aws.Upload_To_S3(f, filename, bucket_prefix="model")

            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' saved. Exited the save_model method of the Model_Finder class'
            )

            return 'success'
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in save_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' could not be saved. Exited the save_model method of the Model_Finder class'
            )
            raise Exception()

    def load_model(self, filename):
        """
                    Method Name: load_model
                    Description: load the model file to memory
                    Output: The Model file loaded in memory
                    On Failure: Raise Exception

                    Written By: Rajat Bisoi
                    Version: 1.0
                    Revisions: None
        """
        self.logger_object.log(
            self.file_object,
            'Entered the load_model method of the File_Operation class')
        try:
            # with open(self.model_directory + filename + '/' + filename + '.sav',
            #           'rb') as f:

            f = self.aws.Download_From_S3_raw(filename,
                                              bucket_prefix='wafer-model')
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' loaded. Exited the load_model method of the Model_Finder class'
            )
            return pickle.loads(f)
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in load_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' could not be saved. Exited the load_model method of the Model_Finder class'
            )
            raise Exception()

    '''###compeletly rewrite find model method...'''

    def find_correct_model_file(self, cluster_number):
        """
                            Method Name: find_correct_model_file
                            Description: Select the correct model based on cluster number
                            Output: The Model file
                            On Failure: Raise Exception

                            Written By: Rajat Bisoi
                            Version: 1.0
                            Revisions: None
                """
        self.logger_object.log(
            self.file_object,
            'Entered the find_correct_model_file method of the File_Operation class'
        )
        try:
            self.cluster_number = cluster_number
            print('cluster no.:', cluster_number)
            # self.folder_name=self.model_directory
            self.list_of_model_files = []
            self.list_of_files = self.aws.Create_S3_Bucket_Instance(
                bucket_prefix="wafer-model").objects.all()
            for self.file in self.list_of_files:
                try:
                    if str(self.cluster_number) in list(self.file.key):
                        self.model_name = self.file.key
                except:
                    continue
            # self.model_name=self.model_name.split('.')[0]
            self.logger_object.log(
                self.file_object,
                'Exited the find_correct_model_file method of the Model_Finder class.'
            )
            return self.model_name
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object,
                'Exited the find_correct_model_file method of the Model_Finder class with Failure'
            )
            raise Exception()
コード例 #5
0
class Prediction_Data_validation:
    """
               This class shall be used for handling all the validation done on the Raw Prediction Data!!.

               Written By: Rajat Bisoi
               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        # self.Batch_Directory = path
        self.schema_path = 'schema_prediction.json'
        self.logger = App_Logger()
        self.mongo = To_mongo_db('wafer')
        self.aws = Aws_Bucket_operation(
            local_file_name_address='config/bucket_name')

    def valuesFromSchema(self):
        """
                                Method Name: valuesFromSchema
                                Description: This method extracts all the relevant information from the pre-defined "Schema" file.
                                Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns
                                On Failure: Raise ValueError,KeyError,Exception

                                 Written By: Rajat Bisoi
                                Version: 1.0
                                Revisions: None

                                        """
        try:
            # with open(self.schema_path, 'r') as f:
            #     dic = json.load(f)
            #     f.close()

            id = self.mongo.Get_ID('schema_wafer_prediction', 'temp_db')
            dic = self.mongo.downlaod_from_mongo_raw('schema_wafer_prediction',
                                                     'temp_db', id[0])
            pattern = dic['SampleFileName']
            LengthOfDateStampInFile = dic['LengthOfDateStampInFile']
            LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile']
            column_names = dic['ColName']
            NumberofColumns = dic['NumberofColumns']

            # file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            message = "LengthOfDateStampInFile:: %s" % LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile + "\t " + "NumberofColumns:: %s" % NumberofColumns + "\n"
            self.logger.log('wafer_log', message)

            # file.close()

        except ValueError:
            # 'wafer_log' = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            self.logger.log(
                'wafer_log',
                "ValueError:Value not found inside schema_training.json")
            # 'wafer_log'.close()
            raise ValueError

        except KeyError:
            # 'wafer_log' = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            self.logger.log('wafer_log',
                            "KeyError:Key value error incorrect key passed")
            # 'wafer_log'.close()
            raise KeyError

        except Exception as e:
            # 'wafer_log' = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+')
            self.logger.log('wafer_log', str(e))
            # 'wafer_log'.close()
            raise e

        return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns

    def manualRegexCreation(self):
        """
                                      Method Name: manualRegexCreation
                                      Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file.
                                                  This Regex is used to validate the filename of the prediction data.
                                      Output: Regex pattern
                                      On Failure: None

                                       Written By: Rajat Bisoi
                                      Version: 1.0
                                      Revisions: None

                                              """
        regex = "['wafer']+['\_'']+[\d_]+[\d]+\.csv"
        return regex

    # Not used
    def createDirectoryForGoodBadRawData(self):
        """
                                        Method Name: createDirectoryForGoodBadRawData
                                        Description: This method creates directories to store the Good Data and Bad Data
                                                      after validating the prediction data.

                                        Output: None
                                        On Failure: OSError

                                         Written By: Rajat Bisoi
                                        Version: 1.0
                                        Revisions: None

                                                """
        try:
            path = os.path.join("Prediction_Raw_Files_Validated/", "Good_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)
            path = os.path.join("Prediction_Raw_Files_Validated/", "Bad_Raw/")
            if not os.path.isdir(path):
                os.makedirs(path)

        except OSError as ex:
            file = open("Prediction_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file, "Error while creating Directory %s:" % ex)
            file.close()
            raise OSError

    # Not used
    def deleteExistingGoodDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingGoodDataTrainingFolder
                                            Description: This method deletes the directory made to store the Good Data
                                                          after loading the data in the table. Once the good files are
                                                          loaded in the DB,deleting the directory ensures space optimization.
                                            Output: None
                                            On Failure: OSError

                                             Written By: Rajat Bisoi
                                            Version: 1.0
                                            Revisions: None

                                                    """
        try:
            path = 'Prediction_Raw_Files_Validated/'
            # if os.path.isdir("ids/" + userName):
            # if os.path.isdir(path + 'Bad_Raw/'):
            #     shutil.rmtree(path + 'Bad_Raw/')
            if os.path.isdir(path + 'Good_Raw/'):
                shutil.rmtree(path + 'Good_Raw/')
                file = open("Prediction_Logs/GeneralLog.txt", 'a+')
                self.logger.log(file,
                                "GoodRaw directory deleted successfully!!!")
                file.close()
        except OSError as s:
            file = open("Prediction_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file, "Error while Deleting Directory : %s" % s)
            file.close()
            raise OSError
        # Not used
    def deleteExistingBadDataTrainingFolder(self):
        """
                                            Method Name: deleteExistingBadDataTrainingFolder
                                            Description: This method deletes the directory made to store the bad Data.
                                            Output: None
                                            On Failure: OSError

                                             Written By: Rajat Bisoi
                                            Version: 1.0
                                            Revisions: None

                                                    """

        try:
            path = 'Prediction_Raw_Files_Validated/'
            if os.path.isdir(path + 'Bad_Raw/'):
                shutil.rmtree(path + 'Bad_Raw/')
                file = open("Prediction_Logs/GeneralLog.txt", 'a+')
                self.logger.log(
                    file,
                    "BadRaw directory deleted before starting validation!!!")
                file.close()
        except OSError as s:
            file = open("Prediction_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file, "Error while Deleting Directory : %s" % s)
            file.close()
            raise OSError
        #Not used
    def moveBadFilesToArchiveBad(self):
        """
                                            Method Name: moveBadFilesToArchiveBad
                                            Description: This method deletes the directory made  to store the Bad Data
                                                          after moving the data in an archive folder. We archive the bad
                                                          files to send them back to the client for invalid data issue.
                                            Output: None
                                            On Failure: OSError

                                             Written By: Rajat Bisoi
                                            Version: 1.0
                                            Revisions: None

                                                    """
        now = datetime.now()
        date = now.date()
        time = now.strftime("%H%M%S")
        try:
            path = "PredictionArchivedBadData"
            if not os.path.isdir(path):
                os.makedirs(path)
            source = 'Prediction_Raw_Files_Validated/Bad_Raw/'
            dest = 'PredictionArchivedBadData/BadData_' + str(
                date) + "_" + str(time)
            if not os.path.isdir(dest):
                os.makedirs(dest)
            files = os.listdir(source)
            for f in files:
                if f not in os.listdir(dest):
                    shutil.move(source + f, dest)
            file = open("Prediction_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file, "Bad files moved to archive")
            path = 'Prediction_Raw_Files_Validated/'
            if os.path.isdir(path + 'Bad_Raw/'):
                shutil.rmtree(path + 'Bad_Raw/')
            self.logger.log(file, "Bad Raw Data Folder Deleted successfully!!")
            file.close()
        except OSError as e:
            file = open("Prediction_Logs/GeneralLog.txt", 'a+')
            self.logger.log(file,
                            "Error while moving bad files to archive:: %s" % e)
            file.close()
            raise OSError

    def validationFileNameRaw(self, regex, LengthOfDateStampInFile,
                              LengthOfTimeStampInFile):
        """
            Method Name: validationFileNameRaw
            Description: This function validates the name of the prediction csv file as per given name in the schema!
                         Regex pattern is used to do the validation.If name format do not match the file is moved
                         to Bad Raw Data folder else in Good raw data.
            Output: None
            On Failure: Exception

             Written By: Rajat Bisoi
            Version: 1.0
            Revisions: None

        """
        # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted.
        # self.deleteExistingBadDataTrainingFolder()
        # self.deleteExistingGoodDataTrainingFolder()
        # self.createDirectoryForGoodBadRawData()
        self.mongo.Delete_collection('temp_db', 'wafer_bad_data_prediction')
        self.mongo.Delete_collection('temp_db', 'wafer_good_data_prediction')

        # onlyfiles = [f for f in listdir(self.Batch_Directory)]
        '''
        try:
            f = open("Prediction_Logs/nameValidationLog.txt", 'a+')
            for filename in onlyfiles:
                if (re.match(regex, filename)):
                    splitAtDot = re.split('.csv', filename)
                    splitAtDot = (re.split('_', splitAtDot[0]))
                    if len(splitAtDot[1]) == LengthOfDateStampInFile:
                        if len(splitAtDot[2]) == LengthOfTimeStampInFile:
                            shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Good_Raw")
                            self.logger.log(f,"Valid File name!! File moved to GoodRaw Folder :: %s" % filename)

                        else:
                            shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw")
                            self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                    else:
                        shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw")
                        self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)
                else:
                    shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw")
                    self.logger.log(f, "Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename)

            f.close()
            
            '''

        bucket_inst = self.aws.Create_S3_Bucket_Instance(
            bucket_prefix='wafer-prediction')
        try:
            # f = open("Training_Logs/nameValidationLog.txt", 'a+')
            for obj in bucket_inst.objects.all():
                data = self.aws.Download_From_S3(obj.key)
                if (re.match(regex, obj.key)):
                    splitAtDot = re.split('.csv', obj.key)
                    splitAtDot = (re.split('_', splitAtDot[0]))
                    if len(splitAtDot[1]) == LengthOfDateStampInFile:
                        if len(splitAtDot[2]) == LengthOfTimeStampInFile:
                            # shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Good_Raw")
                            self.mongo.send_to_mongo(
                                'wafer_good_data_prediction', 'temp_db', data)
                            self.logger.log(
                                'wafer_log',
                                f'file {obj.key} uploaded to collection wafer_good_data'
                            )

                        else:
                            # shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                            data = data.to_json()
                            data = json.loads(data)
                            self.mongo.send_to_mongo_raw(
                                'wafer_bad_data_prediction', 'temp_db', data)
                            self.logger.log(
                                'wafer_log',
                                f'invalid file name  {obj.key} uploaded to collection wafer_bad_data'
                            )
                    else:
                        # shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                        data = data.to_json()
                        data = json.loads(data)
                        self.mongo.send_to_mongo_raw(
                            'wafer_bad_data_prediction', 'temp_db', data)
                        self.logger.log(
                            'wafer_log',
                            f'invalid file name  {obj.key} uploaded to collection wafer_bad_data'
                        )
                else:
                    # shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw")
                    data = data.to_json()
                    data = json.loads(data)
                    self.mongo.send_to_mongo_raw('wafer_bad_data_prediction',
                                                 'temp_db', data)
                    self.logger.log(
                        'wafer_log',
                        f'invalid file name  {obj.key} uploaded to collection wafer_bad_data'
                    )

        except Exception as e:
            # f = open("Prediction_Logs/nameValidationLog.txt", 'a+')
            self.logger.log('wafer_log',
                            "Error occured while validating FileName %s" % e)
            # f.close()
            raise e

    def validateColumnLength(self, NumberofColumns):
        """
                    Method Name: validateColumnLength
                    Description: This function validates the number of columns in the csv files.
                                 It is should be same as given in the schema file.
                                 If not same file is not suitable for processing and thus is moved to Bad Raw Data folder.
                                 If the column number matches, file is kept in Good Raw Data for processing.
                                The csv file is missing the first column name, this function changes the missing name to "Wafer".
                    Output: None
                    On Failure: Exception

                     Written By: Rajat Bisoi
                    Version: 1.0
                    Revisions: None

             """
        '''
        try:
            f = open("Prediction_Logs/columnValidationLog.txt", 'a+')
            self.logger.log(f,"Column Length Validation Started!!")
            for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'):
                csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file)
                if csv.shape[1] == NumberofColumns:
                    csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True)
                else:
                    shutil.move("Prediction_Raw_Files_Validated/Good_Raw/" + file, "Prediction_Raw_Files_Validated/Bad_Raw")
                    self.logger.log(f, "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)

            self.logger.log(f, "Column Length Validation Completed!!")
        except OSError:
            f = open("Prediction_Logs/columnValidationLog.txt", 'a+')
            self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
            f.close()
            raise OSError
        except Exception as e:
            f = open("Prediction_Logs/columnValidationLog.txt", 'a+')
            self.logger.log(f, "Error Occured:: %s" % e)
            f.close()
            raise e

        f.close()
        '''
        try:
            # f = open("Training_Logs/columnValidationLog.txt", 'a+')
            self.logger.log('wafer_log', "Column Length Validation Started!!")
            idx = self.mongo.Get_ID('wafer_good_data_prediction', 'temp_db')
            for file in idx:
                # csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
                try:
                    testfile = self.mongo.downlaod_one_from_mongo(
                        'wafer_good_data_prediction', 'temp_db', file)
                except Exception as err:
                    try:
                        testfile = self.mongo.downlaod_from_mongo_raw(
                            'wafer_good_raw_prediction', 'temp_db', file)
                    except Exception as err1:
                        self.mongo.Move_data_in_collections(
                            'wafer_good_data', 'wafer_bad_data_prediction',
                            'temp_db', file)
                        self.logger.log(
                            'wafer_log',
                            "Invalid Column Length for the file !! File moved to "
                            "wafer_Bad_Raw_prediction collection ")
                        raise [err, err1]
                testfile = pd.DataFrame(testfile)
                if testfile.shape[1] == NumberofColumns:
                    pass
                else:
                    # shutil.move("Training_Raw_files_validated/Good_Raw/" + file, "Training_Raw_files_validated/Bad_Raw")
                    self.mongo.Move_data_in_collections(
                        'wafer_good_data_prediction',
                        'wafer_bad_data_prediction', 'temp_db', file)
                    self.logger.log(
                        'wafer_log',
                        "Invalid Column Length for the file !! File moved to "
                        "wafer_Bad_Raw_prediction collection ")
            self.logger.log('wafer_log',
                            "Column Length Validation Completed!!")
        except OSError:
            # f = open("Training_Logs/columnValidationLog.txt", 'a+')
            self.logger.log('wafer_log',
                            f"Error Occured while moving the file {OSError}")
            # f.close()
            raise OSError
        except Exception as e:
            # f = open("Training_Logs/columnValidationLog.txt", 'a+')
            self.logger.log("wafer_log", f"Error Occured {e}")
            # f.close()
            raise e

    def deletePredictionFile(self):

        # if os.path.exists('Prediction_Output_File/Predictions.csv'):
        #     os.remove('Prediction_Output_File/Predictions.csv')
        self.mongo.Delete_collection('temp_db', 'prediction_output')

    def validateMissingValuesInWholeColumn(self):
        """
                                  Method Name: validateMissingValuesInWholeColumn
                                  Description: This function validates if any column in the csv file has all values missing.
                                               If all the values are missing, the file is not suitable for processing.
                                               SUch files are moved to bad raw data.
                                  Output: None
                                  On Failure: Exception

                                   Written By: Rajat Bisoi
                                  Version: 1.0
                                  Revisions: None

                              """
        '''
        
        try:
            f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log(f, "Missing Values Validation Started!!")

            for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'):
                csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file)
                count = 0
                for columns in csv:
                    if (len(csv[columns]) - csv[columns].count()) == len(csv[columns]):
                        count+=1
                        shutil.move("Prediction_Raw_Files_Validated/Good_Raw/" + file,
                                    "Prediction_Raw_Files_Validated/Bad_Raw")
                        self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file)
                        break
                if count==0:
                    csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True)
        except OSError:
            f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log(f, "Error Occured while moving the file :: %s" % OSError)
            f.close()
            raise OSError
        except Exception as e:
            f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log(f, "Error Occured:: %s" % e)
            f.close()
            raise e
        f.close()
        '''
        try:
            # f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log('wafer_log', "Missing Values Validation Started!!")

            idx = self.mongo.Get_ID('wafer_good_data_prediction', 'temp_db')
            for file in idx:
                # csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file)
                testfile = self.mongo.downlaod_one_from_mongo(
                    'wafer_good_data_prediction', 'temp_db', file)
                testfile = pd.DataFrame(testfile)
                count = 0
                for columns in testfile:
                    if (len(testfile[columns]) -
                            testfile[columns].count()) == len(
                                testfile[columns]):
                        count += 1
                        # shutil.move("Training_Raw_files_validated/Good_Raw/" + file, "Training_Raw_files_validated/Bad_Raw")
                        self.mongo.Move_data_in_collections(
                            'wafer_good_data_prediction',
                            'wafer_bad_data_prediction', 'temp_db', file)
                        self.logger.log(
                            'wafer_log',
                            f"Invalid Column Length for the file!! File moved to wafer_bad_data_prediction :: {file}"
                        )
                        break
                if count == 0:
                    # testfile.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True)
                    # testfile.to_csv("Training_Raw_files_validated/Good_Raw/" + file, index=None, header=True)
                    self.mongo.send_to_mongo('wafer_good_data_prediction',
                                             'temp_db', testfile)
        except OSError:
            # f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log(
                'wafer_log',
                "Error Occured while moving the file :: %s" % OSError)
            # f.close()
            raise OSError
        except Exception as e:
            # f = open("Training_Logs/missingValuesInColumn.txt", 'a+')
            self.logger.log('wafer_log', "Error Occured:: %s" % e)
            # f.close()
            raise e
コード例 #6
0
 def __init__(self, file_object, logger_object):
     self.file_object = file_object
     self.logger_object = logger_object
     # self.model_directory='models/'
     self.aws = Aws_Bucket_operation()
     self.bytesIO = BytesIO()
コード例 #7
0
class KMeansClustering:
    """
            This class shall  be used to divide the data into clusters before training.

            Written By: Rajat Bisoi
            Version: 1.0
            Revisions: None

            """
    def __init__(self, db_name, logger_object):
        self.db_name = db_name
        self.logger_object = logger_object
        self.aws = Aws_Bucket_operation()

    def elbow_plot(self, data):
        """
                        Method Name: elbow_plot
                        Description: This method saves the plot to decide the optimum number of clusters to the file.
                        Output: A picture saved to the directory
                        On Failure: Raise Exception

                        Written By: Rajat Bisoi
                        Version: 1.0
                        Revisions: None

                """
        self.logger_object.log(
            self.db_name,
            'Entered the elbow_plot method of the KMeansClustering class')
        wcss = []  # initializing an empty list

        try:
            for i in range(1, 11):
                kmeans = KMeans(
                    n_clusters=i, init='k-means++',
                    random_state=42)  # initializing the KMeans object
                kmeans.fit(data)  # fitting the data to the KMeans Algorithm
                wcss.append(kmeans.inertia_)
            plt.plot(
                range(1, 11), wcss
            )  # creating the graph between WCSS and the number of clusters
            plt.title('The Elbow Method')
            plt.xlabel('Number of clusters')
            plt.ylabel('WCSS')
            #plt.show()
            plt.savefig('preprocessing_data/K-Means_Elbow.PNG'
                        )  # saving the elbow plot locally
            # finding the value of the optimum cluster programmatically
            self.kn = KneeLocator(range(1, 11),
                                  wcss,
                                  curve='convex',
                                  direction='decreasing')
            self.logger_object.log(
                self.db_name,
                'The optimum number of clusters is: ' + str(self.kn.knee) +
                ' . Exited the elbow_plot method of the KMeansClustering class'
            )
            return self.kn.knee

        except Exception as e:
            self.logger_object.log(
                self.db_name,
                'Exception occured in elbow_plot method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.db_name,
                'Finding the number of clusters failed. Exited the elbow_plot method of the KMeansClustering class'
            )
            raise Exception()

    def create_clusters(self, data, number_of_clusters):
        """
                                Method Name: create_clusters
                                Description: Create a new dataframe consisting of the cluster information.
                                Output: A datframe with cluster column
                                On Failure: Raise Exception

                                Written By: Rajat Bisoi
                                Version: 1.0
                                Revisions: None

                        """
        self.logger_object.log(
            self.db_name,
            'Entered the create_clusters method of the KMeansClustering class')
        self.data = data
        try:
            self.kmeans = KMeans(n_clusters=number_of_clusters,
                                 init='k-means++',
                                 random_state=42)
            #self.data = self.data[~self.data.isin([np.nan, np.inf, -np.inf]).any(1)]
            self.y_kmeans = self.kmeans.fit_predict(
                data)  #  divide data into clusters

            # self.file_op = file_methods.File_Operation(self.db_name, self.logger_object)
            # self.save_model = self.file_op.save_model(self.kmeans, 'KMeans') # saving the KMeans model to directory
            self.kmeans = pickle.dumps(self.kmeans)
            self.aws.Upload_To_S3_obj(
                object=self.kmeans,
                file_name='kmeans.sav',
                bucket_prefix='wafer-model'
            )  # passing 'Model' as the functions need three parameters

            self.data[
                'Cluster'] = self.y_kmeans  # create a new column in dataset for storing the cluster information
            self.logger_object.log(
                self.db_name, 'succesfully created ' + str(self.kn.knee) +
                'clusters. Exited the create_clusters method of the KMeansClustering class'
            )
            return self.data
        except Exception as e:
            self.logger_object.log(
                self.db_name,
                'Exception occured in create_clusters method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.db_name,
                'Fitting the data to clusters failed. Exited the create_clusters method of the KMeansClustering class'
            )
            raise Exception()
コード例 #8
0
 def __init__(self, db_name, logger_object):
     self.db_name = db_name
     self.logger_object = logger_object
     self.aws = Aws_Bucket_operation()
コード例 #9
0
 def __init__(self):
     # self.Batch_Directory = path
     # self.schema_path = 'schema_training.json'
     self.logger = App_Logger('wafer')
     self.aws = Aws_Bucket_operation()
     self.mongo = To_mongo_db('wafer')
コード例 #10
0
from sklearn.cluster import KMeans
from data_preprocessing import preprocessing
import numpy as np
import csv
from awss3_updown.aws_s3_operations import Aws_Bucket_operation
import joblib
from datetime import datetime
import re
import json
from application_logging.logger import App_Logger
from DataTransform_Training.DataTransformation import dataTransform
from application_logging import logger
from io import BytesIO

# bytesIO = BytesIO()
awsopp = Aws_Bucket_operation(local_file_name_address='config/bucket_name')
import pickle
from data_ingestion import data_loader
# b=open('schema_training.json')

# data = json.loads(b.read())

# awsopp.Create_Bucket()
# print(open('config/bucket_name').read())
# a='15'
# X = [[0.5, 1.], [-1., -1.5], [0., -2.]]
# y = [1, -1, -1]
# from sklearn.svm import LinearSVC
# linear_svc = LinearSVC()
# model = linear_svc.fit(X, y)
# model_bin = pickle.dumps(model)