def extract_text_from_image(self, data): """Extract text from a binary string of data.""" tessdata = '/usr/share/tesseract-ocr/' tessdata = self.manager.get_env('TESSDATA_PREFIX', tessdata) languages = self.get_languages(self.result.languages) key = sha1(data) key.update(languages) key = key.hexdigest() text = self.manager.get_cache(key) if text is not None: return text api = PyTessBaseAPI(lang=languages, path=tessdata) try: image = Image.open(StringIO(data)) # TODO: play with contrast and sharpening the images. api.SetImage(image) text = api.GetUTF8Text() except DecompressionBombWarning as dce: log.warning("Image too large: %r", dce) return None except IOError as ioe: log.warning("Unknown image format: %r", ioe) return None finally: api.Clear() log.debug('[%s] OCR: %s, %s characters extracted', self.result, languages, len(text)) self.manager.set_cache(key, text) return text
class OCR(object): MAX_MODELS = 5 MIN_WIDTH = 10 MIN_HEIGHT = 10 def __init__(self): # Tesseract language types: _, self.supported = get_languages() self.reset_engine('eng') def language_list(self, languages): models = [c for c in alpha3(languages) if c in self.supported] if len(models) > self.MAX_MODELS: log.warning("Too many models, limit: %s", self.MAX_MODELS) models = models[:self.MAX_MODELS] models.append('eng') return '+'.join(sorted(set(models))) def reset_engine(self, languages): if hasattr(self, 'api'): self.api.Clear() self.api.End() self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY) def extract_text(self, data, languages=None, mode=PSM.AUTO_OSD): """Extract text from a binary string of data.""" languages = self.language_list(languages) if languages != self.api.GetInitLanguagesAsString(): self.reset_engine(languages) try: image = Image.open(BytesIO(data)) # TODO: play with contrast and sharpening the images. if image.width <= self.MIN_WIDTH: return if image.height <= self.MIN_HEIGHT: return if mode != self.api.GetPageSegMode(): self.api.SetPageSegMode(mode) self.api.SetImage(image) text = self.api.GetUTF8Text() confidence = self.api.MeanTextConf() log.info("%s chars (w: %s, h: %s, langs: %s, confidence: %s)", len(text), image.width, image.height, languages, confidence) return text except Exception as ex: log.exception("Failed to OCR: %s", languages) finally: self.api.Clear()
class TT2Predictor: """holds the several trainer predictor instances and common operations """ def __init__(self, **kwargs): self.trainers_predictors_list = [] self.text_predictors_list = [ ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"), ("main_level", (1203, 323, 1223, 399), "0123456789", "8"), ("next_level", (1212, 445, 1230, 493), "0123456789", "8"), ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"), ("gold", (1091, 283, 1126, 471), "0123456789.abcdefghijklmnopqrstuvwxyz", "7"), ("current_dps_down_no_tab", (389, 562, 423, 709), "0123456789.abcdefghijklmnopqrstuvwxyz", "8"), ("last_hero", (124, 109, 148, 430), "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "7") ] self.api = PyTessBaseAPI() self.api.Init() print(tesserocr.tesseract_version()) print(tesserocr.get_languages()) self.global_image = None self.status = CurrentStatus() boss_trainer = TrainerPredictor( "boss_active_predictor", ["boss_active", "boss_inactive", "no_boss"], (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30]) egg_trainer = TrainerPredictor("egg_active_predictor", ["egg_active", "egg_inactive"], (741, 31, 761, 64), 10, 16, 255.0, [200, 30]) gold_pet_trainer = TrainerPredictor( "gold_pet_predictor", ["goldpet", "nopet", "normalpet", "partial pet"], (624, 364, 734, 474), 40, 40, 255.0, [200, 30]) tab_predictor = TrainerPredictor("tab_predictor", [ "skills_tab", "heroes_tab", "equipment_tab", "pet_tab", "relic_tab", "shop_tab", "no_tab" ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30]) self.trainers_predictors_list.append(boss_trainer) self.trainers_predictors_list.append(egg_trainer) self.trainers_predictors_list.append(gold_pet_trainer) self.trainers_predictors_list.append(tab_predictor) for trainer in self.trainers_predictors_list: pass #trainer.crop_images() #trainer.process_images() #trainer.read_and_pickle() #trainer.train_graph() saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle" save_pickle(saved_classes_file, self.trainers_predictors_list) def parse_raw_image(self): with open(glo.RAW_FULL_FILE, 'rb') as f: image = Image.frombytes('RGBA', (1280, 720), f.read()) for class_predictor in self.trainers_predictors_list: class_predictor.predict_crop(image) self.global_image = image image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" + time.strftime("%Y%m%d-%H%M%S-%f") + ".png") # save original capture copy def parse_image_text(self, predict_map): return_dict = {} for text_predictor in self.text_predictors_list: if text_predictor[0] in predict_map: img = self.global_image.crop(text_predictor[1]) img = img.convert('L') img = img.rotate(90, expand=True) self.api.SetImage(img) self.api.SetVariable("tessedit_char_whitelist", text_predictor[2]) self.api.SetVariable("tessedit_pageseg_mode", text_predictor[3]) self.api.SetVariable("language_model_penalty_non_dict_word", "0") self.api.SetVariable("doc_dict_enable", "0") text_capture = self.api.GetUTF8Text().encode('utf-8').strip() return_dict[text_predictor[0]] = text_capture print("raw text capture ", text_predictor[0], ":", text_capture) self.api.Clear() return return_dict def predict_parsed_all(self): pred_dict = {} for class_predictor in self.trainers_predictors_list: pred_dict[class_predictor.name] = class_predictor.predict_parsed() return pred_dict def predict_parsed(self, predict_map, predict_map_text, **kwargs): pred_dict = {"transition_level": False} # check if image is level trasitioning. trivial prediction. if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False: pass else: img = self.global_image.crop((0, 0, 100, 100)) # black corner extrema = img.convert("L").getextrema() if extrema[0] == extrema[1]: # only one color print("warning level transitioning") pred_dict["transition_level"] = True else: pass for class_predictor in self.trainers_predictors_list: if class_predictor.name in predict_map: pred_dict[ class_predictor.name] = class_predictor.predict_parsed() pred_dict_text = self.parse_image_text(predict_map_text) pred_dict.update(pred_dict_text) self.status.update_status(pred_dict, self.trainers_predictors_list) return pred_dict def predict(self): self.parse_raw_image() return self.predict_parsed_all() def check_predict(self, pred_dict, predictor, classification): for class_predictor in self.trainers_predictors_list: if class_predictor.name == predictor: return int( pred_dict[predictor] ) == class_predictor.pred_classes.index(classification)