Esempio n. 1
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            X = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                X)  #drops the first column for cluster prediction
            X['clusters'] = clusters
            clusters = X['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = X[X['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                predictions = predictions + list(result)

            final = pd.DataFrame(predictions, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
Esempio n. 2
0
 def __init__(self, path):
     self.file_object = 'Prediction_Log'
     self.log_writer = logger.App_Logger()
     self.awsObj = AwsStorageManagement()
     self.emailObj = email()
     if path is not None:
         self.pred_data_val = Prediction_Data_validation(path)
Esempio n. 3
0
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_getter(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)

            is_null_present,cols_with_missing_values=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            data  = preprocessor.logTransformation(data)

            #scale the prediction data
            data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction
            data_scaled['clusters']=clusters
            clusters=data_scaled['clusters'].unique()
            result=[] # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data= data_scaled[data_scaled['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            json_data = result.to_json(orient='values')
            print(json_data)
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path,json_data,result
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            is_null_present=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data)
            data=preprocessor.remove_columns(data,cols_to_drop)
            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction
            data['clusters']=clusters
            clusters=data['clusters'].unique()
            for i in clusters:
                #selecting all the records of a perticular cluster type
                cluster_data= data[data['clusters']==i]
                #getting all the wafer names
                wafer_names = list(cluster_data['Wafer'])
                #dropping wafer and clusters columns
                cluster_data = data.drop(['Wafer','clusters'],axis=1)
                #finding the model name for that cluster
                model_name = file_loader.find_correct_model_file(i)
                #loading the model using the model name
                model = file_loader.load_model(model_name)
                #these are the predicted values 
                pred_values = list(model.predict(cluster_data))
                #creating a dataframe with wafernames and predictions
                result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction'])
                #path to save the dataframe as csv file
                path = "Prediction_Output_File/Predictions.csv"
                #writing to csv files
                result.to_csv(path,header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data = preprocessor.dropUnnecessaryColumns(data, [
                'id', 'region', 'url', 'region_url', 'image_url', 'state',
                'type', 'dogs_allowed'
            ])

            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValuesPrediction(data)

            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons
            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
Esempio n. 6
0
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()



            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"])


            is_null_present,cols_with_missing_values=preprocessor.is_null_present(data)
            if(is_null_present):
                data=data.dropna(how='any')


            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValues(data)
            #scale the prediction data
            data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')


            clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction
            data_scaled['clusters']=clusters
            clusters=data_scaled['clusters'].unique()
            result=[] # initialize blank list for storing predicitons


            for i in clusters:
                cluster_data= data_scaled[data_scaled['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
 def __init__(self, path):
     #self.rootProjPath=rootProjPath
     self.raw_data = Prediction_Data_validation(path)
     self.dataTransform = dataTransformPredict()
     self.dBOperation = dBOperation()
     #my_file = rootProjPath+'\\Prediction_Logs\\Prediction_Log.txt'
     #self.file_object = open(my_file, 'a+')
     self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
     self.log_writer = logger.App_Logger()
 def __init__(self, path, execution_id):
     self.execution_id = execution_id
     #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
     #self.log_writer = logger.App_Logger()
     self.log_database = "wafer_prediction_log"
     self.log_collection = "prediction_log"
     self.log_db_writer = App_LoggerDB(execution_id)
     self.az_blob_mgt = AzureBlobManagement()
     if path is not None:
         self.pred_data_val = Prediction_Data_validation(path, execution_id)
Esempio n. 9
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data.replace(-1, 1, inplace=True)
            new_data = data[[
                'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59',
                'A91'
            ]]

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                new_data)  #drops the first column for cluster prediction
            new_data['clusters'] = clusters
            clusters = new_data['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons

            for i in clusters:
                cluster_data = new_data[new_data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
 def __init__(self, path, execution_id):
     self.raw_data = Prediction_Data_validation(path, execution_id)
     self.dataTransform = dataTransformPredict(execution_id)
     self.dBOperationMongoDB = DbOperationMongoDB(execution_id)
     #self.dBOperation = dBOperation(execution_id)
     self.log_database = "wafer_prediction_log"
     self.log_collection = "prediction_main_log"
     self.execution_id = execution_id
     #self.log_writer = logger.App_Logger()
     self.logDB_write = App_LoggerDB(execution_id=execution_id)
     self.az_blob_mgt = AzureBlobManagement()
Esempio n. 11
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(data.drop(['Wafer'], axis=1))
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(list(zip(wafer_names, result)),
                                          columns=['Wafer', 'Prediction'])
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv("Prediction_Output_File/Predictions.csv",
                              header=True,
                              mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
Esempio n. 12
0
 def __init__(self, path, execution_id):
     self.raw_data = Prediction_Data_validation(path, execution_id)
     self.dataTransform = dataTransformPredict(execution_id)
     #self.dBOperation = dBOperation()
     #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
     #self.log_writer = logger.App_Logger()
     self.dBOperationMongoDB = DbOperationMongoDB(execution_id)
     self.log_database = "strength_prediction_log"
     self.log_collection = "stg-prediction_main_log"
     self.execution_id = execution_id
     self.logDB_write = App_LoggerDB(execution_id=execution_id)
     self.az_blob_mgt = AzureBlobManagement()
class pred_validation:
    def __init__(self, path):
        self.raw_data = Prediction_Data_validation(path)
        self.dataTransform = dataTransformPredict()
        self.dBOperation = dBOperation()
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def prediction_validation(self):

        try:

            self.log_writer.log(
                self.file_object,
                'Start of Validation on files for prediction!!')
            #extracting values from prediction schema
            column_names, noofcolumns = self.raw_data.valuesFromSchema()

            #validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            #validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(
                self.file_object,
                "Creating Prediction_Database and tables on the basis of given schema!!!"
            )
            #create database with given name, if present open the connection! Create table with columns given in schema
            self.dBOperation.createTableDb('Prediction', column_names)
            self.log_writer.log(self.file_object, "Table creation Completed!!")
            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")
            #insert csv files in the table
            self.dBOperation.insertIntoTableGoodData('Prediction')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")
            #Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            #Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")
            #export data in table to csvfile
            self.dBOperation.selectingDatafromtableintocsv('Prediction')

        except Exception as e:
            raise e
Esempio n. 14
0
class pred_validation:
    def __init__(self, path):
        self.raw_data = Prediction_Data_validation(path)
        self.dataTransform = dataTransformPredict()
        self.dBOperation = dBOperation()
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def prediction_validation(self):
        try:
            self.log_writer.log(
                self.file_object,
                'Start of Validation on files for prediction!!')
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )

            regex = self.raw_data.manualRegexCreation()

            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)

            self.raw_data.validateColumnLength(noofcolumns)

            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data validation complete!!")

            self.log_writer.log(self.file_object,
                                "Starting Data transformation!!")
            self.dataTransform.replaceSingleQuotesToDouble()
            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")

            self.log_writer.log(
                self.file_object,
                "Creating Prediction_Database and tables on the basis of given schema!!!"
            )
            self.dBOperation.createTableDb('Prediction', column_names)
            self.log_writer.log(self.file_object, "Table creation Completed!!")

            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")
            self.dBOperation.insertIntoTableGoodData('Prediction')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")

            self.log_writer.log(self.file_object, "Deleting Good Data Folder")
            self.raw_data.deleteExistingGoodDataPredictingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")

            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")

            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")
            self.dBOperation.selectingDatafromtableintocsv('Prediction')

        except Exception as e:
            raise e
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #cdrop 'Unnamed: 0'
            for cl in data.columns:
                if cl == 'Unnamed: 0':
                    data.drop('Unnamed: 0', axis=1, inplace=True)

            # Dropping column after performing EDA
            preprocessor_cus = preprocess_cus.Preprocessor_cus(
                self.file_object, self.log_writer)
            data = preprocessor_cus.drop_column(data)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # replacing '?' values with np.nan as discussed in the EDA part
            data = preprocessor.replaceInvalidValuesWithNull(data)

            # get encoded values for categorical data
            data = preprocessor_cus.test_data_encode(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)

            result = []  # initialize balnk list for storing predicitons

            model = file_loader.load_model('CatBoost')
            for val in (model.predict(data)):
                result.append(val)

            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True)
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
Esempio n. 16
0
class pred_validation:
    def __init__(self):
        self.raw_data = Prediction_Data_validation()
        self.dataTransform = dataTransformPredict()
        self.dBOperation = dBOperation()
        self.file_object = 'wafer_log'  # open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def prediction_validation(self):

        try:

            self.log_writer.log(
                self.file_object,
                'Start of Validation on files for prediction!!')
            # extracting values from prediction schema
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Prediction Raw Data Validation Complete!!")

            self.log_writer.log(self.file_object,
                                ("Starting Data Transforamtion!!"))
            # replacing blanks in the csv file with "Null" values to insert in table
            self.dataTransform.replaceMissingWithNull()

            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")

            # self.log_writer.log(self.file_object,
            #                     "Creating Prediction_Database and tables on the basis of given schema!!!")
            # # create database with given name, if present open the connection! Create table with columns given in schema
            # self.dBOperation.createTableDb('Prediction', column_names)
            # self.log_writer.log(self.file_object, "Table creation Completed!!")
            # self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!")
            # # insert csv files in the table
            # self.dBOperation.insertIntoTableGoodData('Prediction')
            # self.log_writer.log(self.file_object, "Insertion in Table completed!!!")
            # self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!")
            # # Delete the good data folder after loading files in table
            # self.raw_data.deleteExistingGoodDataTrainingFolder()
            # self.log_writer.log(self.file_object, "Good_Data folder deleted!!!")
            # self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # # Move the bad files to archive folder
            # self.raw_data.moveBadFilesToArchiveBad()
            # self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            # self.log_writer.log(self.file_object, "Extracting csv file from table")
            # # export data in table to csvfile
            # self.dBOperation.selectingDatafromtableintocsv('Prediction')

        except Exception as e:
            raise e
class prediction:
    def __init__(self, path, execution_id):
        #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        #self.log_writer = logger.App_Logger()
        #self.pred_data_val = Prediction_Data_validation(path)
        self.execution_id = execution_id
        self.log_database = "strength_prediction_log"
        self.log_collection = "prediction_log"
        self.log_db_writer = App_LoggerDB(execution_id)
        self.az_blob_mgt = AzureBlobManagement()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path, execution_id)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'Start of Prediction')
            print("start of prediction")
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.log_database, self.log_collection, self.execution_id)
            data = data_getter.get_data()

            path = ""
            if data.__len__() == 0:
                self.log_db_writer.log(
                    self.log_database, self.log_collection,
                    "No data was present to perform prediction existing prediction method"
                )
                return path, "No data was present to perform prediction"

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            data = preprocessor.logTransformation(data)
            print("after log Transformation")
            print(data)

            #scale the prediction data
            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            print("standard scaling for data completed")
            print(data_scaled)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            kmeans = file_loader.load_model('kkmeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                print(model_name)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)

            result = pandas.DataFrame(result, columns=['strength-Predictions'])

            #result = list(model.predict(cluster_data))
            #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction'])
            #for val in (model.predict(cluster_data.values)):
            #    result.append(val)
            #print(self.result.shape)
            print("results after prediction with prediction columns")
            print(result)

            path = "Prediction-Output-File"
            #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.az_blob_mgt.saveDataFrametoCSV(
                path,
                "cement-strength-prediction.csv",
                result,
                header=True,
                mode="a+")

            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'End of Prediction')
        except Exception as ex:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
Esempio n. 18
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.scaleData(data)

            #data = preprocessor.enocdeCategoricalvalues(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data)  #drops the first column for cluster prediction
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    if val == 0:
                        result.append("Lodgepole_Pine")
                    elif val == 1:
                        result.append("Spruce_Fir")
                    elif val == 2:
                        result.append("Douglas_fir")
                    elif val == 3:
                        result.append("Krummholz")
                    elif val == 4:
                        result.append("Ponderosa_Pine")
                    elif val == 5:
                        result.append("Aspen")
                    elif val == 6:
                        result.append("Cottonwood_Willow")
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')

        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+")
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile()  # deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, "Start of Prediction")
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer
            )
            data = data_getter.get_data()

            # code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if is_null_present:
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(data)
            data = preprocessor.remove_columns(data, cols_to_drop)
            # data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object, self.log_writer)
            kmeans = file_loader.load_model("KMeans")

            ##Code changed
            # pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data.drop(["Wafer"], axis=1)
            )  # drops the first column for cluster prediction
            data["clusters"] = clusters
            clusters = data["clusters"].unique()
            for i in clusters:
                cluster_data = data[data["clusters"] == i]
                wafer_names = list(cluster_data["Wafer"])
                cluster_data = data.drop(labels=["Wafer"], axis=1)
                cluster_data = cluster_data.drop(["clusters"], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(
                    list(zip(wafer_names, result)), columns=["Wafer", "Prediction"]
                )
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv(
                    "Prediction_Output_File/Predictions.csv", header=True, mode="a+"
                )  # appends result to prediction file
            self.log_writer.log(self.file_object, "End of Prediction")
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                "Error occured while running the prediction!! Error:: %s" % ex,
            )
            raise ex
        return path, result.head().to_json(orient="records")
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data, ['veiltype'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            # get encoded values for categorical data
            data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):

                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
Esempio n. 21
0
class prediction:
    def __init__(self, path):
        self.file_object = 'Prediction_Log'
        self.log_writer = logger.App_Logger()
        self.awsObj = AwsStorageManagement()
        self.emailObj = email()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data, ['veil-type'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            # get encoded values for categorical data
            data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')
            print(kmeans.labels_)
            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            self.awsObj.saveDataframeToCsv('Prediction_Output_File',
                                           'Predictions.csv', result)
            self.log_writer.log(self.file_object, 'End of Prediction')

            msg = MIMEMultipart()
            msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str(
                datetime.now())
            body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg'
            msg.attach(MIMEText(body, 'html'))
            to_addr = ['*****@*****.**']
            self.emailObj.trigger_mail(to_addr, [], msg)
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()


            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(data,[])  # remove the column as it doesn't contribute to prediction.
            data.replace('?', np.NaN, inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(data, cols_with_missing_values)  # missing value imputation
            # encode categorical data
            #data = preprocessor.encode_categorical_columns(data)
            df=data.copy()
            df.drop(labels=['Sex'],axis=1,inplace=True)

            file_loader = file_methods.File_Operation(self.file_object, self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed

            clusters=kmeans.predict(df)
            data['clusters']=clusters
            data = preprocessor.encode_categorical_columns(data)
            clusters=data['clusters'].unique()
            predictions=[]
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(np.array(cluster_data)))
                for res in result:
                    if res == 0:
                        predictions.append('1-8 Rings')
                    elif res == 1:
                        predictions.append('11+ Rings')
                    else:
                        predictions.append('9-10 Rings')

            final= pd.DataFrame(list(zip(predictions)),columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path , final
Esempio n. 23
0
 def __init__(self):
     self.file_object = 'wafer_log'
     self.log_writer = logger.App_Logger()
     self.pred_data_val = Prediction_Data_validation()
class pred_validation:
    def __init__(self, path, execution_id):
        self.raw_data = Prediction_Data_validation(path, execution_id)
        self.dataTransform = dataTransformPredict(execution_id)
        self.dBOperationMongoDB = DbOperationMongoDB(execution_id)
        #self.dBOperation = dBOperation(execution_id)
        self.log_database = "wafer_prediction_log"
        self.log_collection = "prediction_main_log"
        self.execution_id = execution_id
        #self.log_writer = logger.App_Logger()
        self.logDB_write = App_LoggerDB(execution_id=execution_id)
        self.az_blob_mgt = AzureBlobManagement()

        #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        #self.log_writer = logger.App_Logger()

    def prediction_validation(self):

        try:
            self.logDB_write.log(
                self.log_database, self.log_collection,
                'Start of Validation on files for prediction!!')
            #extracting values from prediction schema
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            #getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            #validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            #validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            #validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Raw Data Validation Complete!!")

            self.logDB_write.log(self.log_database, self.log_collection,
                                 ("Starting Data Transforamtion!!"))
            #replacing blanks in the csv file with "Null" values to insert in table
            self.dataTransform.replaceMissingWithNull()

            self.logDB_write.log(self.log_database, self.log_collection,
                                 "DataTransformation Completed!!!")
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Creating Prediction_Database and tables on the basis of given schema!!!"
            )
            #create database with given name, if present open the connection! Create table with columns given in schema
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Creating database and collection if not exist then insert record"
            )
            #insert csv files in the table
            self.dBOperationMongoDB.insertIntoTableGoodData(column_names)
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Insertion in Table completed!!!")
            #self.logDB_write.log(self.log_database, self.log_collection,"Deleting Good Data Folder!!!")
            #Delete the good data folder after loading files in table
            #self.raw_data.deleteExistingGoodDataTrainingFolder()
            #self.logDB_write.log(self.log_database, self.log_collection,"Good_Data folder deleted!!!")
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            #Move the bad files to archive folder
            print("moving bad files to archieve")
            self.raw_data.moveBadFilesToArchiveBad()
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Validation Operation completed!!")
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Extracting csv file from table")
            #export data in table to csvfile
            self.dBOperationMongoDB.selectingDatafromtableintocsv()

        except Exception as e:
            raise e
 def __init__(self, path):
     self.raw_data = Prediction_Data_validation(path)
     self.dataTransform = dataTransformPredict()
     self.dBOperation = dBOperation()
     self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
     self.log_writer = logger.App_Logger()
Esempio n. 26
0
class pred_validation:
    def __init__(self, path):
        self.raw_data = Prediction_Data_validation(path)

        self.dBOperation = dBOperation()
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def prediction_validation(self):

        try:

            self.log_writer.log(
                self.file_object,
                'Start of Validation on files for prediction!!')
            #extracting values from prediction schema
            column_names, noofcolumns = self.raw_data.valuesFromSchema()

            #getting the regex defined to validate filename
            #regex = self.raw_data.manualRegexCreation()

            #validating filename of prediction files
            #self.raw_data.validationFileNameRaw(regex)

            #validating number of columns
            self.raw_data.validateColumnLength(noofcolumns)

            #validating if any column has all values missing
            #self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(
                self.file_object,
                "Creating Prediction_Database and collection on the basis of given schema!!!"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema
            collection = self.dBOperation.createCollection()
            self.log_writer.log(self.file_object,
                                "collection creation Completed!!")
            self.log_writer.log(
                self.file_object,
                "Insertion of Data into collection started!!!!")

            # insert csv files in the collection
            self.dBOperation.GoodDatainsertIntoCollection(collection)
            self.log_writer.log(self.file_object,
                                "Insertion in collection completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")

            # Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataPredictionFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")

            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchive()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")

            # export data in table to csvfile
            self.dBOperation.selectingDatafromCollectionintocsv(collection)
            self.file_object.close()

        except Exception as e:
            raise e
Esempio n. 27
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'policy_number', 'policy_bind_date', 'policy_state',
                'insured_zip', 'incident_location', 'incident_date',
                'incident_state', 'incident_city', 'insured_hobbies',
                'auto_make', 'auto_model', 'auto_year', 'age',
                'total_claim_amount'
            ])
            data.replace('?', np.NaN, inplace=True)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            data = preprocessor.encode_categorical_columns(data)
            data = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('N')
                    else:
                        predictions.append('Y')
            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
Esempio n. 28
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'education'
            ])  # remove the column as it doesn't contribute to prediction.
            data = preprocessor.remove_unwanted_spaces(
                data)  # remove unwanted spaces from the dataframe
            data.replace(
                '?', np.NaN,
                inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            scaled_num_df = preprocessor.scale_numerical_columns(data)
            cat_df = preprocessor.encode_categorical_columns(data)
            X = pd.concat([scaled_num_df, cat_df], axis=1)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                X)  #drops the first column for cluster prediction
            X['clusters'] = clusters
            clusters = X['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = X[X['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('<=50K')
                    else:
                        predictions.append('>50K')

            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
 def __init__(self, path):
     self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+")
     self.log_writer = logger.App_Logger()
     if path is not None:
         self.pred_data_val = Prediction_Data_validation(path)
Esempio n. 30
0
class pred_validation:
    def __init__(self,path):
        self.raw_data = Prediction_Data_validation(path)
        self.dataTransform = dataTransformPredict()
        self.dBOperation = dBOperation()
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def prediction_validation(self):

        try:

            self.log_writer.log(self.file_object,'Start of Validation on files for prediction!!')
            #extracting values from prediction schema
            column_names,noofcolumns = self.raw_data.valuesFromSchema()
            #getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            #validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex)
            #validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            #validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,"Raw Data Validation Complete!!")
            self.log_writer.log(self.file_object,"Affan here")
            
            self.log_writer.log(self.file_object,("Starting Data Transforamtion!!"))
            
            
            #self.dataTransform.addQuotesToStringValuesInColumn()
            #replacing blanks in the csv file with "Null" values to insert in table
            self.dataTransform.EDA()
            self.log_writer.log(self.file_object,"EDA Completed!!!")
            self.log_writer.log(self.file_object,"DataTransformation Completed!!!")
            column_names_Updated,noofcolumns = self.raw_data.valuesFromSchema_updated()
            
            self.log_writer.log(self.file_object,"Creating Prediction_Database and tables on the basis of given schema!!!")
            #create database with given name, if present open the connection! Create table with columns given in schema
            #self.dBOperation.dataBaseConnection('Prediction')
            #column_names_updated_after_EDA = ['PassengerId','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked','Title']
            #self.log_writer.log(self.file_object,"DB Conn Chk")
            self.dBOperation.createTableDb('Prediction',column_names_Updated)
            self.log_writer.log(self.file_object,"Table creation Completed!!")
            self.log_writer.log(self.file_object,"Insertion of Data into Table started!!!!")
            #insert csv files in the table
            
            self.dBOperation.insertIntoTableGoodData('Prediction')
            self.log_writer.log(self.file_object,"Insertion in Table completed!!!")
            
            self.log_writer.log(self.file_object,"Deleting Good Data Folder!!!")
            #Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,"Good_Data folder deleted!!!")
            self.log_writer.log(self.file_object,"Moving bad files to Archive and deleting Bad_Data folder!!!")
            #Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(self.file_object,"Bad files moved to archive!! Bad folder Deletedaffan1!!")
            self.log_writer.log(self.file_object,"Validation Operation completedaffan2!!")
            self.log_writer.log(self.file_object,"Extracting csv file from table affan3")
            #export data in table to csvfile
            self.dBOperation.selectingDatafromtableintocsv('Prediction')
            self.log_writer.log(self.file_object,"Selected data from table into csv affan")
            
            

            

        except Exception as e:
            raise e