def correct(self, text_data): """Correct text data Args: text_data (`Text`): Text data """ unigrams = Unigrams( join( self.config["root"], self.config["dirs"]["models_root"], self.config["dirs"]["models"]["inline"], self.config["models"]["inline"]["unigrams"], )) ml_classifier = load( join(self.config["dirs"]["models_root"], self.config["dirs"]["models"]["learning"], self.config["models"]["learning"]["classifier"])) if ml_classifier is None: return self.model["algo"].set_classifier(ml_classifier) for paragraph in text_data.text: for line in paragraph: if line.grade % 5 == 0: continue f = MachineLearningFeatures() features = f.extract_features(line, unigrams.ngrams, text_data.stats) line.grade = self.model["algo"].classify(features) * 5
def __init__(self, app_config): super(MachineLearningModel, self).__init__(app_config) self.model = { "algo": MachineLearningAlgorithm(), "features": MachineLearningFeatures() }
def train(self, dataset): """Train the model with a dataset Args: dataset (list): List of training files """ # Get the original training set training_set = self.model["algo"].training_set # Append the new data to it for text in dataset: self.logger.debug("Processing " + text.filename + "...") unigrams = Unigrams( join( self.config["root"], self.config["dirs"]["models_root"], self.config["dirs"]["models"]["inline"], self.config["models"]["inline"]["unigrams"], )) for p in text.text: for line in p: if line.grade % 5 != 0: # Unclassified lines are useless for the training continue f = MachineLearningFeatures() features = f.extract_features(line, unigrams.ngrams, text.stats) result = int(line.grade / 5) training_set["features"].append(features) training_set["results"].append(result) self.logger.debug("Saving training set...") save( training_set, join(self.config["dirs"]["models_root"], self.config["dirs"]["models"]["learning"], self.config["models"]["learning"]["training_set"])) self.logger.debug("Training model...") ml_classifier = SGDClassifier(loss="log", class_weight="auto") self.model["algo"].set_classifier(ml_classifier) self.model["algo"].set_training_set(training_set["features"], training_set["results"]) self.model["algo"].train() save( self.model["algo"].classifier, join(self.config["dirs"]["models_root"], self.config["dirs"]["models"]["learning"], self.config["models"]["learning"]["classifier"]))