def classify_new_data(x_new): """ Loads the pickled classifier and tfidf vectorizer then uses those for transforming and classifying new data :param x_new: New data that needs to be classified :return: prediction on new data """ file_path_dict = MyUtils.get_file_path_dict() with open(file_path_dict[CONSTANTS.TRAINED_MODEL], 'rb') as file: classifier = pickle.load(file) with open(file_path_dict[CONSTANTS.TFIDF_VECTORIZER], 'rb') as file: tfidf_vect = pickle.load(file) predicted = classifier.predict(tfidf_vect.transform(x_new)) return predicted
def train_model(X_data, y_data): """ Trains the model by using the parameter values. Default classifier is MultinomialNB Also stores the trained model and fitted tfidf vectorizer on disk with file names 'trained_model' and 'tfidf_vect' under utility dir :return: Fitted model and fitted tfidf_vectorizer """ file_path_dict = MyUtils.get_file_path_dict() tfidf_vect = get_tfidf_vectorizer() classifier = get_classifier() X_tfidfed = tfidf_vect.fit_transform(X_data) classifier.fit(X_tfidfed, y_data) with open(file_path_dict[CONSTANTS.TRAINED_MODEL], 'wb') as file: pickle.dump(classifier, file) with open(file_path_dict[CONSTANTS.TFIDF_VECTORIZER], 'wb') as file: pickle.dump(tfidf_vect, file) return classifier, tfidf_vect