def test_load_classifier_cached(self):
        classifier = load_classifier()
        self.assertIsNotNone(classifier)

        with mock.patch(
                "documents.classifier.DocumentClassifier.load") as load:
            classifier2 = load_classifier()
            load.assert_not_called()
Exemple #2
0
    def handle(self, *args, **options):

        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()
        documents = queryset.distinct()

        classifier = load_classifier()

        for document in tqdm.tqdm(documents):

            if options['correspondent']:
                set_correspondent(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'])

            if options['document_type']:
                set_document_type(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'])

            if options['tags']:
                set_tags(sender=None,
                         document=document,
                         classifier=classifier,
                         replace=options['overwrite'])
Exemple #3
0
def train_classifier():
    if (not Tag.objects.filter(
                matching_algorithm=Tag.MATCH_AUTO).exists() and
        not DocumentType.objects.filter(
            matching_algorithm=Tag.MATCH_AUTO).exists() and
        not Correspondent.objects.filter(
            matching_algorithm=Tag.MATCH_AUTO).exists()):

        return

    classifier = load_classifier()

    if not classifier:
        classifier = DocumentClassifier()

    try:
        if classifier.train():
            logger.info(
                "Saving updated classifier model to {}...".format(
                    settings.MODEL_FILE)
            )
            classifier.save()
        else:
            logger.debug(
                "Training data unchanged."
            )

    except Exception as e:
        logger.warning(
            "Classifier error: " + str(e)
        )
    def test_load_classifier_os_error(self, load):
        Path(settings.MODEL_FILE).touch()
        self.assertTrue(os.path.exists(settings.MODEL_FILE))

        load.side_effect = OSError()
        self.assertIsNone(load_classifier())
        self.assertTrue(os.path.exists(settings.MODEL_FILE))
    def test_load_classifier_incompatible_version(self, load):
        Path(settings.MODEL_FILE).touch()
        self.assertTrue(os.path.exists(settings.MODEL_FILE))

        load.side_effect = IncompatibleClassifierVersionError()
        self.assertIsNone(load_classifier())
        self.assertFalse(os.path.exists(settings.MODEL_FILE))
Exemple #6
0
    def handle(self, *args, **options):
        # Detect if we support color
        color = self.style.ERROR("test") != "test"

        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()
        documents = queryset.distinct()

        classifier = load_classifier()

        for document in tqdm.tqdm(documents,
                                  disable=options['no_progress_bar']):

            if options['correspondent']:
                set_correspondent(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'],
                                  suggest=options['suggest'],
                                  base_url=options['base_url'],
                                  color=color)

            if options['document_type']:
                set_document_type(sender=None,
                                  document=document,
                                  classifier=classifier,
                                  replace=options['overwrite'],
                                  use_first=options['use_first'],
                                  suggest=options['suggest'],
                                  base_url=options['base_url'],
                                  color=color)

            if options['tags']:
                set_tags(sender=None,
                         document=document,
                         classifier=classifier,
                         replace=options['overwrite'],
                         suggest=options['suggest'],
                         base_url=options['base_url'],
                         color=color)
 def test_load_classifier(self, load):
     Path(settings.MODEL_FILE).touch()
     self.assertIsNotNone(load_classifier())
     load.assert_called_once()
 def test_load_classifier_not_exists(self):
     self.assertFalse(os.path.exists(settings.MODEL_FILE))
     self.assertIsNone(load_classifier())