def classify_category(self, category_path, model_func): confusion_row = {} for article in os.listdir(category_path): article_words = {} parse_article(category_path, article, article_words, self.stop_list) best_probability = -sys.maxint - 1 best_category = None for category_name in self.categories: probability = model_func(self.categories[category_name], article_words) if probability > best_probability: best_probability = probability best_category = category_name confusion_row[best_category] = confusion_row.setdefault(best_category, 0) + 1 self.confusion_matrix[os.path.basename(category_path)] = confusion_row
def classify_category(self, category_path, model_func): confusion_row = {} for article in os.listdir(category_path): article_words = {} parse_article(category_path, article, article_words, self.stop_list) best_probability = -sys.maxint - 1 best_category = None for category_name in self.categories: probability = model_func(self.categories[category_name], article_words) if probability > best_probability: best_probability = probability best_category = category_name confusion_row[best_category] = confusion_row.setdefault( best_category, 0) + 1 self.confusion_matrix[os.path.basename(category_path)] = confusion_row
def categorize(self, path): for article in os.listdir(path): self.article_count += 1 # TODO add exception handling parse_article(path, article, self.word_counts, self.stop_words)