def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data.drop(
                ['Wafer'],
                axis=1))  #drops the first column for cluster prediction
            data['clusters'] = clusters
            data.to_csv(
                'Prediction_Raw_Files_Validated/finalpredictiondata.csv',
                index=False)
            clusters = data['clusters'].unique()
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(list(zip(wafer_names, result)),
                                          columns=['Wafer', 'Prediction'])
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv("Prediction_Output_File/Predictions.csv",
                              header=True,
                              mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
예제 #2
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(data, [
                'education'])  # remove the column as it doesn't contribute to prediction.
            data = preprocessor.remove_unwanted_spaces(data)  # remove unwanted spaces from the dataframe
            data.replace('?', np.NaN, inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            scaled_num_df = preprocessor.scale_numerical_columns(data)
            cat_df = preprocessor.encode_categorical_columns(data)
            X = pd.concat([scaled_num_df, cat_df], axis=1)


            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(X)#drops the first column for cluster prediction
            X['clusters']=clusters
            clusters=X['clusters'].unique()
            predictions=[]
            for i in clusters:
                cluster_data= X[X['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result=(model.predict(cluster_data))
                for res in result:
                    if res==0:
                        predictions.append('<=50K')
                    else:
                        predictions.append('>50K')

            final= pd.DataFrame(list(zip(predictions)),columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #3
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.scaleData(data)

            #data = preprocessor.enocdeCategoricalvalues(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data)  #drops the first column for cluster prediction
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    if val == 0:
                        result.append("Lodgepole_Pine")
                    elif val == 1:
                        result.append("Spruce_Fir")
                    elif val == 2:
                        result.append("Douglas_fir")
                    elif val == 3:
                        result.append("Krummholz")
                    elif val == 4:
                        result.append("Ponderosa_Pine")
                    elif val == 5:
                        result.append("Aspen")
                    elif val == 6:
                        result.append("Cottonwood_Willow")
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')

        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #4
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            data = preprocessor.logTransformation(data)
            # print(data)

            # #scale the prediction data
            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            # #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize blank list for storing predicitons

            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')

        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex

        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data,
                                                       ['TSH_measured', 'T3_measured', 'TT4_measured', 'T4U_measured',
                                                        'FTI_measured', 'TBG_measured', 'TBG', 'TSH'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValuesPrediction(data)
            is_null_present=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data)#drops the first column for cluster prediction
            data['clusters']=clusters
            clusters=data['clusters'].unique()
            result=[] # initialize balnk list for storing predicitons
            with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
                encoder = pickle.load(file)

            for i in clusters:
                cluster_data= data[data['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (encoder.inverse_transform(model.predict(cluster_data))):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #cdrop 'Unnamed: 0'
            for cl in data.columns:
                if cl == 'Unnamed: 0':
                    data.drop('Unnamed: 0', axis=1, inplace=True)

            # Dropping column after performing EDA
            preprocessor_cus = preprocess_cus.Preprocessor_cus(
                self.file_object, self.log_writer)
            data = preprocessor_cus.drop_column(data)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # replacing '?' values with np.nan as discussed in the EDA part
            data = preprocessor.replaceInvalidValuesWithNull(data)

            # get encoded values for categorical data
            data = preprocessor_cus.test_data_encode(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)

            result = []  # initialize balnk list for storing predicitons

            model = file_loader.load_model('CatBoost')
            for val in (model.predict(data)):
                result.append(val)

            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True)
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()


            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(data,[])  # remove the column as it doesn't contribute to prediction.
            data.replace('?', np.NaN, inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(data, cols_with_missing_values)  # missing value imputation
            # encode categorical data
            #data = preprocessor.encode_categorical_columns(data)
            df=data.copy()
            df.drop(labels=['Sex'],axis=1,inplace=True)

            file_loader = file_methods.File_Operation(self.file_object, self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed

            clusters=kmeans.predict(df)
            data['clusters']=clusters
            data = preprocessor.encode_categorical_columns(data)
            clusters=data['clusters'].unique()
            predictions=[]
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(np.array(cluster_data)))
                for res in result:
                    if res == 0:
                        predictions.append('1-8 Rings')
                    elif res == 1:
                        predictions.append('11+ Rings')
                    else:
                        predictions.append('9-10 Rings')

            final= pd.DataFrame(list(zip(predictions)),columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path , final
예제 #8
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"])


            is_null_present,cols_with_missing_values=preprocessor.is_null_present(data)
            if(is_null_present):
                data=data.dropna(how='any')


            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValues(data)
            #scale the prediction data
            data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction
            data_scaled['clusters']=clusters
            clusters=data_scaled['clusters'].unique()
            result=[] # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data= data_scaled[data_scaled['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #9
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            X = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                X)  #drops the first column for cluster prediction
            X['clusters'] = clusters
            clusters = X['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = X[X['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                predictions = predictions + list(result)

            final = pd.DataFrame(predictions, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data = preprocessor.dropUnnecessaryColumns(data, [
                'id', 'region', 'url', 'region_url', 'image_url', 'state',
                'type', 'dogs_allowed'
            ])

            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValuesPrediction(data)

            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons
            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
    def predictionFromModel(self, singlerecdata=None):

        try:
            self.loggerObj.logger_log('Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.loggerObj)
            if singlerecdata is None:
                data = data_getter.get_data()
            else:
                data = data_getter.get_data_for_rec(singlerecdata)

            preprocessor = preprocessing.Preprocessor(self.loggerObj)

            # repalcing '?' values with np.nan as discussed in the EDA part
            data = preprocessor.replaceInvalidValuesWithNull(data)

            data = preprocessor.dropUnnecessaryColumnsForPrediction(data)

            # check if missing values are present in the dataset
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            data = preprocessor.encodeCategoricalValuesPrediction(data)

            data = preprocessor.prediction_data_standardisation(data)

            file_loader = file_methods.File_Operation(self.loggerObj)
            model_name = file_loader.find_model_file()
            model = file_loader.load_model(model_name)

            result = []  # initialize balnk list for storing predicitons
            with open(
                    'EncoderPickle/enc.pickle', 'rb'
            ) as file:  # let's load the encoder pickle file to decode the values
                encoder = pickle.load(file)
            if singlerecdata is None:
                for val in (encoder.inverse_transform(model.predict(data))):
                    result.append(val)
                result = pandas.DataFrame(result, columns=['Predictions'])
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv("Prediction_Output_File/Predictions.csv",
                              header=True)  # appends result to prediction file
                self.loggerObj.logger_log('End of Prediction')
                return path
            else:
                val = encoder.inverse_transform(model.predict(data))
                self.loggerObj.logger_log('End of Prediction')
                return val

        except Exception as ex:
            self.loggerObj.logger_log(
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            is_null_present=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data)
            data=preprocessor.remove_columns(data,cols_to_drop)
            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction
            data['clusters']=clusters
            clusters=data['clusters'].unique()
            for i in clusters:
                #selecting all the records of a perticular cluster type
                cluster_data= data[data['clusters']==i]
                #getting all the wafer names
                wafer_names = list(cluster_data['Wafer'])
                #dropping wafer and clusters columns
                cluster_data = data.drop(['Wafer','clusters'],axis=1)
                #finding the model name for that cluster
                model_name = file_loader.find_correct_model_file(i)
                #loading the model using the model name
                model = file_loader.load_model(model_name)
                #these are the predicted values 
                pred_values = list(model.predict(cluster_data))
                #creating a dataframe with wafernames and predictions
                result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction'])
                #path to save the dataframe as csv file
                path = "Prediction_Output_File/Predictions.csv"
                #writing to csv files
                result.to_csv(path,header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
예제 #13
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data.replace(-1, 1, inplace=True)
            new_data = data[[
                'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59',
                'A91'
            ]]

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                new_data)  #drops the first column for cluster prediction
            new_data['clusters'] = clusters
            clusters = new_data['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons

            for i in clusters:
                cluster_data = new_data[new_data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
예제 #14
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data  = preprocessor.logTransformation(data)

            #encode the prediction data
            data_scaled = preprocessor.encodeCategoricalValuesPrediction(data)
            ###Time features
            data = preprocessor.create_timefeatures(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)

            model = file_loader.load_model('XGBOOST')
            result.model.predict(data)

            result = pandas.DataFrame(result, columns=['Predictions'])
            result['Item_Identifier'] = data["Item_Identifier"]
            result["Outlet_Identifier"] = data["Outlet_Identifier"]
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data  = preprocessor.logTransformation(data)
            cols_to_drop = ["Item_Identifier", "Outlet_Identifier"]
            data_useful = preprocessor.remove_columns(data, cols_to_drop)
            #scale the prediction data
            data_scaled = preprocessor.scale_numerical_columns(data_useful)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            result['Item_Identifier'] = data["Item_Identifier"]
            result["Outlet_Identifier"] = data["Outlet_Identifier"]
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #16
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'policy_number', 'policy_bind_date', 'policy_state',
                'insured_zip', 'incident_location', 'incident_date',
                'incident_state', 'incident_city', 'insured_hobbies',
                'auto_make', 'auto_model', 'auto_year', 'age',
                'total_claim_amount'
            ])  # remove the column as it doesn't contribute to prediction.
            data.replace(
                '?', np.NaN,
                inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation
            # encode categorical data
            data = preprocessor.encode_categorical_columns(data)
            data = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed

            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('N')
                    else:
                        predictions.append('Y')

            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()
            self.log_writer.log(self.file_object, 'Let me chk data')
            print(data.head())
            #self.log_writer.log(self.file_object,data.head())
            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            #data=preprocessor.remove_columns(data,['Wafer']) # remove the unnamed column as it doesn't contribute to prediction.
            data = preprocessor.binning(data)
            #removing unwanted columns as discussed in the EDA part in ipynb file
            data = preprocessor.dropUnnecessaryColumns(data, ['Ageband'])
            #print(data.isnull().sum())
            data = preprocessor.combiningfornewfeature(data)
            data = preprocessor.dropUnnecessaryColumns(
                data, ['Parch', 'Sibsp', 'FamilySize'])

            data = preprocessor.convertCategoricalfeatureIntonumeric(data)

            data = preprocessor.binningfare(data)
            data = preprocessor.dropUnnecessaryColumns(
                data, ['FareBand', 'PassengerId'])
            print(data.head())
            #print(data.isnull().sum())

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data)  # missing value imputation

            self.log_writer.log(self.file_object, '--Fati-03.5--')

            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            #data_scaled = pandas.DataFrame(data,columns=data.columns)
            self.log_writer.log(self.file_object, '--Fati-04--,It worked :)')

            ##----Predictions left only, will work after lunch IA

            data = data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            RfClassifier = file_loader.load_model('RandomForestClassifier')
            self.log_writer.log(self.file_object, '--Fati-05--')
            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            classifier = RfClassifier.predict(
                data_scaled)  #drops the first column for cluster prediction
            self.log_writer.log(self.file_object, '--Fati-06--')
            #data_scaled['clusters']=clusters
            #self.log_writer.log(self.file_object,'--Fati-07--')
            #clusters=data_scaled['clusters'].unique()
            self.log_writer.log(self.file_object, '--Fati-08--')
            print(classifier)
            #result=[] # initialize blank list for storing predicitons
            #with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #   encoder = pickle.load(file)

            #for i in clusters:
            #   cluster_data= data_scaled[data_scaled['clusters']==i]
            #  cluster_data = cluster_data.drop(['clusters'],axis=1)
            # model_name = file_loader.find_correct_model_file(i)
            #model = file_loader.load_model(model_name)
            #   result.append(val)
            result = pandas.DataFrame(classifier, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #18
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(
                data,
                ["Index_Product", "sku", "oe_constraint"
                 ])  #removing oe_constraint as it was removed in training

            data = preprocessor.encodeCategoricalValuesPred(data)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                #data=preprocessor.impute_missing_values(data)
                data = data.dropna()

            #cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data)
            #data=preprocessor.remove_columns(data,cols_to_drop)

            data = preprocessor.scale_numerical_columns(data)

            data = preprocessor.pcaTransformation(data)
            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            # kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            #clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction
            #data['clusters']=clusters
            ##for i in clusters:
            #cluster_data= data[data['clusters']==i]
            #wafer_names = list(cluster_data['Wafer'])
            ##cluster_data = cluster_data.drop(['clusters'],axis=1)
            model_name = file_loader.find_correct_model_file()

            model = file_loader.load_model(model_name)
            result = list(model.predict(data))
            #result = pandas.DataFrame(list(zip(wafer_names,result)),columns=['Wafer','Prediction'])
            result = pandas.DataFrame(result, columns=['Prediction'])
            result["Prediction"] = result["Prediction"].map({
                0: "Yes",
                1: "No"
            })
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #19
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            #data = preprocessor.dropUnnecessaryColumns(data,['veiltype'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                #data=preprocessor.impute_missing_values(data,cols_with_missing_values)
                data = preprocessor.handleMissingValues(
                    data)  # missing value imputation by mean

            # we get these columns while training and we dropped them there, so we will drop it from here
            cols_to_drop = ['cd_000', 'ch_000']

            # drop the columns obtained above
            X = preprocessor.remove_columns(data, cols_to_drop)

            X = preprocessor.scale_numerical_columns(X)

            X = preprocessor.pcaTransformation(X)

            # get encoded values for categorical data
            #data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            #kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            #clusters=kmeans.predict(data)#drops the first column for cluster prediction
            #data['clusters']=clusters
            #clusters=data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            #for i in clusters:
            #cluster_data= data[data['clusters']==i]
            #cluster_data = cluster_data.drop(['clusters'],axis=1)
            model_name = file_loader.find_correct_model_file()
            model = file_loader.load_model(model_name)
            for val in (model.predict(X)):
                result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            result['Predictions'] = result['Predictions'].map({
                0: 'neg',
                1: 'pos'
            })
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'Start of Prediction')
            print("start of prediction")
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.log_database, self.log_collection, self.execution_id)
            data = data_getter.get_data()

            path = ""
            if data.__len__() == 0:
                self.log_db_writer.log(
                    self.log_database, self.log_collection,
                    "No data was present to perform prediction existing prediction method"
                )
                return path, "No data was present to perform prediction"

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            data = preprocessor.logTransformation(data)
            print("after log Transformation")
            print(data)

            #scale the prediction data
            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            print("standard scaling for data completed")
            print(data_scaled)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            kmeans = file_loader.load_model('kkmeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                print(model_name)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)

            result = pandas.DataFrame(result, columns=['strength-Predictions'])

            #result = list(model.predict(cluster_data))
            #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction'])
            #for val in (model.predict(cluster_data.values)):
            #    result.append(val)
            #print(self.result.shape)
            print("results after prediction with prediction columns")
            print(result)

            path = "Prediction-Output-File"
            #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.az_blob_mgt.saveDataFrametoCSV(
                path,
                "cement-strength-prediction.csv",
                result,
                header=True,
                mode="a+")

            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'End of Prediction')
        except Exception as ex:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
예제 #21
0
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data, ['veil-type'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            # get encoded values for categorical data
            data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')
            print(kmeans.labels_)
            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            self.awsObj.saveDataframeToCsv('Prediction_Output_File',
                                           'Predictions.csv', result)
            self.log_writer.log(self.file_object, 'End of Prediction')

            msg = MIMEMultipart()
            msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str(
                datetime.now())
            body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg'
            msg.attach(MIMEText(body, 'html'))
            to_addr = ['*****@*****.**']
            self.emailObj.trigger_mail(to_addr, [], msg)
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
예제 #22
0
    def predictionFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'policy_number', 'policy_bind_date', 'policy_state',
                'insured_zip', 'incident_location', 'incident_date',
                'incident_state', 'incident_city', 'insured_hobbies',
                'auto_make', 'auto_model', 'auto_year', 'age',
                'total_claim_amount'
            ])
            data.replace('?', np.NaN, inplace=True)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            data = preprocessor.encode_categorical_columns(data)
            data = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('N')
                    else:
                        predictions.append('Y')
            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.log_database, self.log_collection, self.execution_id)
            data = data_getter.get_data()
            path = ""
            if data.__len__() == 0:
                self.log_db_writer.log(
                    self.log_database, self.log_collection,
                    "No data was present to perform prediction existing prediction method"
                )
                return path, "No data was present to perform prediction"

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            kmeans = file_loader.load_model('KMeans')
            #print(kmeans)

            ##Code changed
            pred_data = data.drop(['Wafer'], axis=1)
            clusters = kmeans.predict(
                pred_data)  #drops the first column for cluster prediction
            data['clusters'] = clusters
            clust = data['clusters'].unique()
            for i in clust:
                cluster_data = data[
                    data['clusters'] ==
                    i]  # filteing of data with clusters no 0,1,2,etc.
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                self.result = pandas.DataFrame(list(zip(wafer_names, result)),
                                               columns=['Wafer', 'Prediction'])
                print(self.result.shape)
                print(self.result)
                #path="Prediction_Output_File/Predictions.csv"
                path = "prediction-output-file"
                self.az_blob_mgt.saveDataFrametoCSV(path,
                                                    "prediction.csv",
                                                    self.result,
                                                    header=True,
                                                    mode="a+")

                #result.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            #self.log_writer.log(self.file_object,'End of Prediction')
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'End of prediction')
        except Exception as ex:

            #self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex

        return path, self.result.head().to_json(orient="records")