class KMeansClustering: """ This class shall be used to divide the data into clusters before training. """ def __init__(self, file_object, logger_object): self.file_object = file_object self.logger_object = logger_object self.awsObj = AwsStorageManagement() def elbow_plot(self, data): """ Method Name: elbow_plot Description: This method saves the plot to decide the optimum number of clusters to the file. Output: A picture saved to the directory On Failure: Raise Exception """ self.logger_object.log( self.file_object, 'Entered the elbow_plot method of the KMeansClustering class') wcss = [] # initializing an empty list try: for i in range(1, 11): kmeans = KMeans( n_clusters=i, init='k-means++', random_state=42) # initializing the KMeans object kmeans.fit(data) # fitting the data to the KMeans Algorithm wcss.append(kmeans.inertia_) plt.plot( range(1, 11), wcss ) # creating the graph between WCSS and the number of clusters plt.title('The Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('WCSS') #plt.show() img_buffer = io.BytesIO() plt.savefig(img_buffer, format='png') img_buffer.seek(0) self.awsObj.deleteFile('models') self.awsObj.deleteFile('preprocessing_data') self.awsObj.saveObject('preprocessing_data', 'K-Means_Elbow.PNG', img_buffer, 'image/png') print('Saving kneeplot to aws') #plt.savefig('preprocessing_data/K-Means_Elbow.PNG') # saving the elbow plot locally # finding the value of the optimum cluster programmatically self.kn = KneeLocator(range(1, 11), wcss, curve='convex', direction='decreasing') self.logger_object.log( self.file_object, 'The optimum number of clusters is: ' + str(self.kn.knee) + ' . Exited the elbow_plot method of the KMeansClustering class' ) return self.kn.knee except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in elbow_plot method of the KMeansClustering class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'Finding the number of clusters failed. Exited the elbow_plot method of the KMeansClustering class' ) raise Exception() def create_clusters(self, data, number_of_clusters): """ Method Name: create_clusters Description: Create a new dataframe consisting of the cluster information. Output: A dataframe with cluster column On Failure: Raise Exception """ self.logger_object.log( self.file_object, 'Entered the create_clusters method of the KMeansClustering class') self.data = data try: self.kmeans = KMeans(n_clusters=number_of_clusters, init='k-means++', random_state=42) #self.data = self.data[~self.data.isin([np.nan, np.inf, -np.inf]).any(1)] self.y_kmeans = self.kmeans.fit_predict( data) # divide data into clusters self.file_op = file_methods.File_Operation(self.file_object, self.logger_object) print('Saving Kmeans Model') self.save_model = self.file_op.save_model( self.kmeans, 'KMeans') # saving the KMeans model to directory # passing 'Model' as the functions need three parameters self.data[ 'Cluster'] = self.y_kmeans # create a new column in dataset for storing the cluster information self.logger_object.log( self.file_object, 'successfully created ' + str(self.kn.knee) + 'clusters. Exited the create_clusters method of the KMeansClustering class' ) return self.data except Exception as e: self.logger_object.log( self.file_object, 'Exception occurred in create_clusters method of the KMeansClustering class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'Fitting the data to clusters failed. Exited the create_clusters method of the KMeansClustering class' ) raise Exception()
class File_Operation: """ This class shall be used to save the model after training and load the saved model for prediction. """ def __init__(self, file_object, logger_object): self.file_object = file_object self.logger_object = logger_object self.model_directory = 'models' self.awsObj = AwsStorageManagement() def save_model(self, model, filename): """ Method Name: save_model Description: Save the model file to directory Outcome: File gets saved On Failure: Raise Exception """ self.logger_object.log( self.file_object, 'Entered the save_model method of the File_Operation class') try: print('Start Saving Model') with io.BytesIO() as f: pickle.dump(model, f) # save the model to file f.seek(0) self.awsObj.saveObject(self.model_directory, filename + '.sav', f, 'text/html') print('Model Saved') self.logger_object.log( self.file_object, 'Model File ' + filename + ' saved. Exited the save_model method of the Model_Finder class' ) return 'success' except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in save_model method of the Model_Finder class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'Model File ' + filename + ' could not be saved. Exited the save_model method of the Model_Finder class' ) raise Exception() def load_model(self, filename): """ Method Name: load_model Description: load the model file to memory Output: The Model file loaded in memory On Failure: Raise Exception """ self.logger_object.log( self.file_object, 'Entered the load_model method of the File_Operation class') try: f = self.awsObj.loadObject(self.model_directory, filename + '.sav') print('Model load done') self.logger_object.log( self.file_object, 'Model File ' + filename + ' loaded. Exited the load_model method of the Model_Finder class' ) return f except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in load_model method of the Model_Finder class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'Model File ' + filename + ' could not be saved. Exited the load_model method of the Model_Finder class' ) raise Exception() def find_correct_model_file(self, cluster_number): """ Method Name: find_correct_model_file Description: Select the correct model based on cluster number Output: The Model file On Failure: Raise Exception """ self.logger_object.log( self.file_object, 'Entered the find_correct_model_file method of the File_Operation class' ) try: self.cluster_number = cluster_number self.folder_name = self.model_directory self.list_of_model_files = [] self.list_of_files = self.awsObj.listDirFiles(self.folder_name) for self.file in self.list_of_files: try: if (self.file.index(str(self.cluster_number)) != -1): self.model_name = self.file except: continue self.model_name = self.model_name.split('.')[0] self.logger_object.log( self.file_object, 'Exited the find_correct_model_file method of the Model_Finder class.' ) return self.model_name except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message: ' + str(e)) self.logger_object.log( self.file_object, 'Exited the find_correct_model_file method of the Model_Finder class with Failure' ) raise Exception()