Exemple #1
0
def train_classifier():
    classifier = DocumentClassifier()

    try:
        # load the classifier, since we might not have to train it again.
        classifier.reload()
    except (OSError, EOFError, IncompatibleClassifierVersionError):
        # This is what we're going to fix here.
        classifier = DocumentClassifier()

    try:
        if classifier.train():
            logging.getLogger(__name__).info(
                "Saving updated classifier model to {}...".format(
                    settings.MODEL_FILE)
            )
            classifier.save_classifier()
        else:
            logging.getLogger(__name__).debug(
                "Training data unchanged."
            )

    except Exception as e:
        logging.getLogger(__name__).error(
            "Classifier error: " + str(e)
        )
    def testSaveClassifier(self):

        self.generate_test_data()

        self.classifier.train()

        self.classifier.save_classifier()

        new_classifier = DocumentClassifier()
        new_classifier.reload()
        self.assertFalse(new_classifier.train())
    def testReload(self):

        self.generate_test_data()
        self.assertTrue(self.classifier.train())
        self.classifier.save_classifier()

        classifier2 = DocumentClassifier()
        classifier2.reload()
        v1 = classifier2.classifier_version

        # change the classifier after some time.
        sleep(1)
        self.classifier.save_classifier()

        classifier2.reload()
        v2 = classifier2.classifier_version
        self.assertNotEqual(v1, v2)
Exemple #4
0
    def handle(self, *args, **options):

        self.verbosity = options["verbosity"]

        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()
        documents = queryset.distinct()

        classifier = DocumentClassifier()
        try:
            classifier.reload()
        except (OSError, EOFError, IncompatibleClassifierVersionError) as e:
            logging.getLogger(__name__).warning(
                f"Cannot classify documents: {e}.")
            classifier = None

        for document in documents:
            logging.getLogger(__name__).info(
                f"Processing document {document.title}")

            if options['correspondent']:
                set_correspondent(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'])

            if options['document_type']:
                set_document_type(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'])

            if options['tags']:
                set_tags(sender=None,
                         document=document,
                         classifier=classifier,
                         replace=options['overwrite'])
    def testVersionIncreased(self):

        self.generate_test_data()
        self.assertTrue(self.classifier.train())
        self.assertFalse(self.classifier.train())

        self.classifier.save_classifier()

        classifier2 = DocumentClassifier()

        current_ver = DocumentClassifier.FORMAT_VERSION
        with mock.patch(
                "documents.classifier.DocumentClassifier.FORMAT_VERSION",
                current_ver + 1):
            # assure that we won't load old classifiers.
            self.assertRaises(IncompatibleClassifierVersionError,
                              classifier2.reload)

            self.classifier.save_classifier()

            # assure that we can load the classifier after saving it.
            classifier2.reload()