Esempio n. 1
0
def get_languages(codes):
    """Turn some ISO2 language codes into ISO3 codes."""
    supported = []
    for code in list_to_alpha3(codes):
        if code in LANGUAGES:
            supported.append(code)
    return '+'.join(sorted(supported))
Esempio n. 2
0
 def get_languages(self, languages):
     if not hasattr(self, 'supported_languages'):
         from tesserocr import get_languages
         _, self.supported_languages = get_languages()
     codes = set(['eng'])
     for lang in list_to_alpha3(codes):
         if lang in self.supported_languages:
             codes.add(lang)
     return '+'.join(sorted(codes))
Esempio n. 3
0
def get_models(entity):
    """Iterate over the NER models applicable to the given entity."""
    languages = entity.get_type_values(registry.language)
    models = set()
    for lang in list_to_alpha3(languages):
        model = settings.NER_MODELS.get(lang)
        if model is not None:
            models.add(model)
    for model in models:
        yield _load_model(model)
Esempio n. 4
0
    def extract_text(self, data, languages=None):
        """Extract text from a binary string of data."""
        codes = set(['eng'])
        for lang in list_to_alpha3(codes):
            if lang in self.supported_languages:
                codes.add(lang)
        languages = '+'.join(sorted(codes))
        api = self.get_api(languages)

        if languages != api.GetInitLanguagesAsString():
            api.Init(lang=languages)

        try:
            # TODO: play with contrast and sharpening the images.
            image = Image.open(BytesIO(data))
            if not self.image_size_ok(image):
                return
            api.SetImage(image)
            return api.GetUTF8Text()
        except Exception as ex:
            log.warning("Failed to OCR: %s", ex)
        finally:
            api.Clear()
Esempio n. 5
0
 def test_list(self):
     assert 'srp' in list_to_alpha3('bs')
     assert 'srp' not in list_to_alpha3('bs', synonyms=False)
     assert 'deu' in list_to_alpha3(['bs', 'de'])
Esempio n. 6
0
 def test_list(self):
     assert "srp" in list_to_alpha3("bs")
     assert "srp" not in list_to_alpha3("bs", synonyms=False)
     assert "deu" in list_to_alpha3(["bs", "de"])