def main(imap_folder_name): "Mostly copy-pasted form drymail-train; probably need to be cleaned up" with config.IMAP_connection() as im: for msg_num, msg in each_imap_message(im,imap_folder_name): cat, prob = Category.classify(str(msg)) print "%s(%f): %s" % (cat.name, prob, msg.get('Subject', '(none)').replace('\n',' '))
def do_supertrain(self, cat): """train the current message on the given category until the classifier says that's what it is""" train_count = 0 while True: train_count += 1 (newcat, prob) = Category.classify(str(self.current_message)) if newcat == cat: break print "Training %d... (%s %.2f%%)" % (train_count, newcat.name, prob*100) cat.train(str(self.current_message)) self._classify()
def _classify(self): self.current_category = self.current_probability = None if not Category.all(): print "Can't classify because I have no categories" return if not self.current_message: return cat, prob = Category.classify(str(self.current_message)) self.current_category = cat self.current_probability = prob return self.current_category