class OCR(object):
    MAX_MODELS = 5
    DEFAULT_MODE = PSM.AUTO_OSD

    # DEFAULT_MODE = PSM.AUTO

    def __init__(self):
        # Tesseract language types:
        _, self.supported = get_languages()

    def language_list(self, languages):
        models = [c for c in alpha3(languages) if c in self.supported]
        if len(models) > self.MAX_MODELS:
            log.warning("Too many models, limit: %s", self.MAX_MODELS)
            models = models[:self.MAX_MODELS]
        models.append('eng')
        return '+'.join(sorted(set(models)))

    def configure_engine(self, languages, mode):
        # log.info("Configuring OCR engine (%s)", languages)
        if not hasattr(self, 'api'):
            self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)
        if languages != self.api.GetInitLanguagesAsString():
            self.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        if mode != self.api.GetPageSegMode():
            self.api.SetPageSegMode(mode)
        return self.api

    def extract_text(self, data, languages=None, mode=DEFAULT_MODE):
        """Extract text from a binary string of data."""
        languages = self.language_list(languages)
        api = self.configure_engine(languages, mode)

        try:
            image = Image.open(BytesIO(data))
            # TODO: play with contrast and sharpening the images.
            start_time = time.time()
            api.SetImage(image)
            text = api.GetUTF8Text()
            confidence = api.MeanTextConf()
            end_time = time.time()
            duration = end_time - start_time
            log.info("%s chars (w: %s, h: %s, langs: %s, c: %s), took: %.5f",
                     len(text), image.width, image.height, languages,
                     confidence, duration)
            return text
        except Exception as ex:
            log.exception("Failed to OCR: %s", languages)
        finally:
            api.Clear()
Beispiel #2
0
class TT2Predictor:
    """holds the several trainer predictor instances and common operations """
    def __init__(self, **kwargs):
        self.trainers_predictors_list = []
        self.text_predictors_list = [
            ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"),
            ("main_level", (1203, 323, 1223, 399), "0123456789", "8"),
            ("next_level", (1212, 445, 1230, 493), "0123456789", "8"),
            ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"),
            ("gold", (1091, 283, 1126, 471),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "7"),
            ("current_dps_down_no_tab", (389, 562, 423, 709),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "8"),
            ("last_hero", (124, 109, 148, 430),
             "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
             "7")
        ]
        self.api = PyTessBaseAPI()
        self.api.Init()
        print(tesserocr.tesseract_version())
        print(tesserocr.get_languages())
        self.global_image = None
        self.status = CurrentStatus()

        boss_trainer = TrainerPredictor(
            "boss_active_predictor",
            ["boss_active", "boss_inactive", "no_boss"],
            (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30])
        egg_trainer = TrainerPredictor("egg_active_predictor",
                                       ["egg_active", "egg_inactive"],
                                       (741, 31, 761, 64), 10, 16, 255.0,
                                       [200, 30])
        gold_pet_trainer = TrainerPredictor(
            "gold_pet_predictor",
            ["goldpet", "nopet", "normalpet", "partial pet"],
            (624, 364, 734, 474), 40, 40, 255.0, [200, 30])
        tab_predictor = TrainerPredictor("tab_predictor", [
            "skills_tab", "heroes_tab", "equipment_tab", "pet_tab",
            "relic_tab", "shop_tab", "no_tab"
        ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30])
        self.trainers_predictors_list.append(boss_trainer)
        self.trainers_predictors_list.append(egg_trainer)
        self.trainers_predictors_list.append(gold_pet_trainer)
        self.trainers_predictors_list.append(tab_predictor)
        for trainer in self.trainers_predictors_list:
            pass
            #trainer.crop_images()
            #trainer.process_images()
            #trainer.read_and_pickle()
            #trainer.train_graph()
        saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle"
        save_pickle(saved_classes_file, self.trainers_predictors_list)

    def parse_raw_image(self):
        with open(glo.RAW_FULL_FILE, 'rb') as f:
            image = Image.frombytes('RGBA', (1280, 720), f.read())
        for class_predictor in self.trainers_predictors_list:
            class_predictor.predict_crop(image)
        self.global_image = image
        image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" +
                   time.strftime("%Y%m%d-%H%M%S-%f") +
                   ".png")  # save original capture copy

    def parse_image_text(self, predict_map):
        return_dict = {}
        for text_predictor in self.text_predictors_list:
            if text_predictor[0] in predict_map:
                img = self.global_image.crop(text_predictor[1])

                img = img.convert('L')
                img = img.rotate(90, expand=True)
                self.api.SetImage(img)
                self.api.SetVariable("tessedit_char_whitelist",
                                     text_predictor[2])
                self.api.SetVariable("tessedit_pageseg_mode",
                                     text_predictor[3])
                self.api.SetVariable("language_model_penalty_non_dict_word",
                                     "0")
                self.api.SetVariable("doc_dict_enable", "0")
                text_capture = self.api.GetUTF8Text().encode('utf-8').strip()
                return_dict[text_predictor[0]] = text_capture
                print("raw text capture ", text_predictor[0], ":",
                      text_capture)
                self.api.Clear()
        return return_dict

    def predict_parsed_all(self):
        pred_dict = {}
        for class_predictor in self.trainers_predictors_list:
            pred_dict[class_predictor.name] = class_predictor.predict_parsed()
        return pred_dict

    def predict_parsed(self, predict_map, predict_map_text, **kwargs):
        pred_dict = {"transition_level": False}

        # check if image is level trasitioning. trivial prediction.
        if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False:
            pass
        else:
            img = self.global_image.crop((0, 0, 100, 100))  # black corner
            extrema = img.convert("L").getextrema()
            if extrema[0] == extrema[1]:  # only one color
                print("warning level transitioning")
                pred_dict["transition_level"] = True
            else:
                pass

        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name in predict_map:
                pred_dict[
                    class_predictor.name] = class_predictor.predict_parsed()
        pred_dict_text = self.parse_image_text(predict_map_text)
        pred_dict.update(pred_dict_text)
        self.status.update_status(pred_dict, self.trainers_predictors_list)
        return pred_dict

    def predict(self):
        self.parse_raw_image()
        return self.predict_parsed_all()

    def check_predict(self, pred_dict, predictor, classification):
        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name == predictor:
                return int(
                    pred_dict[predictor]
                ) == class_predictor.pred_classes.index(classification)
class DonationRecognizer:
    TESSERACT_THRESHOLD = 75

    def __init__(self):
        self.tesseract_client = PyTessBaseAPI(lang='rus+eng')

    def mask_donator_name_and_amount(self, frame):
        hsv_frame = cv.cvtColor(frame, cv.COLOR_BGR2HSV)

        # Blue color range donator name and donate amount
        lower_val = np.array([90, 155, 155])
        upper_val = np.array([103, 255, 255])
        mask = cv.inRange(hsv_frame, lower_val, upper_val)

        masked_frame = cv.bitwise_not(mask)

        masked_frame = cv.morphologyEx(masked_frame, cv.MORPH_CLOSE, (3,3))
        masked_frame = cv.GaussianBlur(masked_frame, (3,3), 0)

        return masked_frame

    def bottom_part_of_frame_with_donator_name_and_amount(self, frame):
        return frame[540:720, 180:1100]

    def top_part_of_frame_with_donator_name_and_amount(self, frame):
        return frame[0:80, 180:1100]

    def is_frame_empty(self, frame):
        return np.sum(frame == 0) < 200

    def find_donator_name_and_amount(self, frame, lang, results):
        # Frame doesn't have donation text
        if self.is_frame_empty():
            return [results, False]

        self.tesseract_client.Init(lang=lang)
        self.tesseract_client.SetImage(Image.fromarray(frame))

        tsv_data = self.tesseract_client.GetTSVText(0)
        parsed_tsv_data = csv.reader(StringIO(tsv_data), delimiter='\t')

        recognized = False

        for row in parsed_tsv_data:
            text = row[-1]
            conf = int(row[-2])
            left = int(row[-6])
            height = int(row[-3])
            width = int(row[-4])

            if conf >= TESSERACT_THRESHOLD:
                recognized = True
                index = None

                for i in range(-10, 10):
                    if left + i in results:
                        index = left + i

                index = index or left

                if not index in results:
                    results[index] = []

                results[index].append([conf, height, width, text])

        # Frame has something similar to donation text colorwise but it's not donation
        if len(results) <= 2:
            return [{}, False]

        return [results, recognized]

    def find_most_accurate(self, results):
        words = []

        for key, result in collections.OrderedDict(sorted(results.items())).items():
            common_height = Counter(list(map(lambda x: x[1], result))).most_common(1)[0][0]
            common_width = Counter(list(map(lambda x: x[2], result))).most_common(1)[0][0]

            filtered_result = list(filter(lambda x: x[1] == common_height and x[2] == common_width, result))

            words.append(max(filtered_result, key=lambda x: x[0])[3])

        return words

    def format_donation_data(self, donation):
        raw_donation = ' '.join(donation).strip()
        raw_donation = re.sub(r"((?<=RUB|USD|EUR)(?:.(?!(RUB|USD|EUR)))+)$", "", raw_donation)
        raw_donation = re.sub(r"^\W+", "", raw_donation)
        raw_donation = re.sub(r"\W+$", "", raw_donation)

        regexped = re.search("(.+?) (([\d\s]+\,?\d*) (RUB|USD|EUR)(?:!?)\Z)", raw_donation)

        if not regexped:
            return {
                'raw_donation': raw_donation
            }

        groups = regexped.groups()

        return {
            'raw_donation': raw_donation,
            'author': groups[0].rstrip(' -'),
            'amount': float(groups[2].replace(',', '.').replace(' ', '')),
            'currency': groups[3],
        }

    def vods_with_donations(self):
        date_format = '%Y-%m-%d %H:%M:%S'

        # Donations were enabled since Sep 30, 2015
        # First stream is https://www.youtube.com/watch?v=snkEcLcIPew
        first_donation_date = dt.datetime.strptime('2015-09-30 00:00:00', date_format)

        channel = pafy.get_channel("https://www.youtube.com/user/SpitefulDick")
        uploads = list(filter(lambda vod: dt.datetime.strptime(vod.published, date_format) >= first_donation_date, channel.uploads))

        return sorted(uploads, key=lambda vod: vod.published)

    def save(self, data):
        donation = Donation(
            raw_donation=data.get('raw_donation'),
            author=data.get('author'),
            amount=data.get('amount'),
            currency=data.get('currency'),
            donated_at=data.get('donated_at'),
            donation_image=memoryview(data.get('donation_image')),
            vod_youtube_id=self.vod.videoid,
            vod_published_at=self.vod.published
        )

        db.session.add(donation)
        db.session.commit()

    def update(self, donation, data):
        donation.updated_at = dt.datetime.now()
        donation.raw_donation = data.get('raw_donation')
        donation.donation_image = data.get('donation_image')
        donation.author = data.get('author')
        donation.amount = data.get('amount')
        donation.currency = data.get('currency')

        db.session.commit()

    def recognize(self, video, vod, frame_start, break_after_recognition, callback):
        self.video = video
        self.vod = vod

        frame_number = 1

        first_extracted_at = None
        first_recognized_frame = None
        results = {}
        recognized = False

        not_recognized_times = 0

        ret, frame = self.video.read()
        lang = "rus"

        while(ret):
            # Get every 30th frame since youtube has 30 frames per second
            if frame_number % 30 == 0 and frame_number / 30 > frame_start:
                frame_part = self.bottom_part_of_frame_with_donator_name_and_amount(frame)
                masked_frame = self.mask_donator_name_and_amount(frame_part)

                # Try to find donation at the bottom of frame
                results, recognized = self.find_donator_name_and_amount(masked_frame, lang, results)

                # Try to find donation at the top of frame
                if not recognized:
                    frame_part = self.top_part_of_frame_with_donator_name_and_amount(frame)
                    masked_frame = self.mask_donator_name_and_amount(frame_part)

                    results, recognized = self.find_donator_name_and_amount(masked_frame, lang, results)

                # Switch languages for Tesseract since donation author can have name in russian, english or mixed
                if lang == "rus":
                    lang = "eng"
                else:
                    lang = "rus"

                if recognized:
                    first_extracted_at = first_extracted_at or frame_number

                    # Store frame when donation was detected at first. We save this data later
                    if first_recognized_frame is None:
                        first_recognized_frame = frame_part
                elif results and not_recognized_times >= 3:
                    words = self.find_most_accurate(results)
                    formatted_donation_data = self.format_donation_data(words)

                    if formatted_donation_data:
                        formatted_donation_data['donated_at'] = int(first_extracted_at / 30)
                        _, formatted_donation_data['donation_image'] = cv.imencode('.webp', first_recognized_frame, [cv.IMWRITE_WEBP_QUALITY, 20])
                        callback(formatted_donation_data)

                    results = {}
                    first_extracted_at = None
                    first_recognized_frame = None
                    not_recognized_times = 0

                    if break_after_recognition:
                        break
                else:
                    not_recognized_times = not_recognized_times + 1

                    if not_recognized_times >= 4 and break_after_recognition:
                        break

            frame_number = frame_number + 1
            ret, frame = self.video.read()
Beispiel #4
0
#with PyTessBaseAPI() as api:
#    for img in images:
#        api.SetImageFile(img)
#        print( api.GetUTF8Text())
#        print( api.AllWordConfidences())

img = Image.open(
    glo.DATA_FOLDER + '/number_range_predictorcropped3.png'
)  #glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + '/fullcapture961 .png')
#img = img.convert('L')

from tesserocr import PyTessBaseAPI, RIL, iterate_level, PSM
#print(help(tesserocr))

api = PyTessBaseAPI()
api.Init()
api.SetImageFile(glo.DATA_FOLDER + '/number_range_predictorcropped3.png')
api.SetVariable("tessedit_pageseg_mode", "7")
api.SetVariable("language_model_penalty_non_dict_word", "0")
api.SetVariable("doc_dict_enable", "0")
print("recognized txt:", api.GetUTF8Text().encode('utf-8').strip())
#api.Recognize()
"""
ri = api.GetIterator()
level = RIL.SYMBOL
for r in iterate_level(ri, level):
    symbol = r.GetUTF8Text(level)  # r == ri
    conf = r.Confidence(level)
    print(u'symbol {}, conf: {}'.format(symbol, conf).encode('utf-8').strip())
    indent = False
    ci = r.GetChoiceIterator()
Beispiel #5
0
def ocr_on_bounding_boxes(img, components):

    blurbs = []
    for component in components:
        (aspect, vertical, horizontal) = segment_into_lines(img, component)
        #if len(vertical)<2 and len(horizontal)<2:continue

        #attempt to separately process furigana
        #(furigana, non_furigana) = estimate_furigana(vertical)
        '''
      from http://code.google.com/p/tesseract-ocr/wiki/ControlParams
      Useful parameters for Japanese and Chinese

      Some Japanese tesseract user found these parameters helpful for increasing tesseract-ocr (3.02) accuracy for Japanese :

      Name 	Suggested value 	Description
      chop_enable 	T 	Chop enable.
      use_new_state_cost 	F 	Use new state cost heuristics for segmentation state evaluation
      segment_segcost_rating 	F 	Incorporate segmentation cost in word rating?
      enable_new_segsearch 	0 	Enable new segmentation search path.
      language_model_ngram_on 	0 	Turn on/off the use of character ngram model.
      textord_force_make_prop_words 	F 	Force proportional word segmentation on all rows. 
    '''
        #now run OCR on this bounding box
        api = PyTessBaseAPI(path='C:/Program Files/Tesseract-OCR/tessdata')
        api.Init(".", "jpn", tesseract.OEM_DEFAULT)
        #handle single column lines as "vertical align" and Auto segmentation otherwise
        if len(vertical) < 2:
            api.SetPageSegMode(
                5)  #tesseract.PSM_VERTICAL_ALIGN)#PSM_AUTO)#PSM_SINGLECHAR)#
        else:
            api.SetPageSegMode(tesseract.PSM_AUTO)  #PSM_SINGLECHAR)#
        api.SetVariable('chop_enable', 'T')
        api.SetVariable('use_new_state_cost', 'F')
        api.SetVariable('segment_segcost_rating', 'F')
        api.SetVariable('enable_new_segsearch', '0')
        api.SetVariable('language_model_ngram_on', '0')
        api.SetVariable('textord_force_make_prop_words', 'F')
        api.SetVariable('tessedit_char_blacklist', '}><L')
        api.SetVariable('textord_debug_tabfind', '0')

        x = component[1].start
        y = component[0].start
        w = component[1].stop - x
        h = component[0].stop - y
        roi = cv2.cv.CreateImage((w, h), 8, 1)
        sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
        cv2.cv.Copy(sub, roi)
        tesseract.SetCvImage(roi, api)
        txt = api.GetUTF8Text()
        conf = api.MeanTextConf()
        if conf > 0 and len(txt) > 0:
            blurb = Blurb(x, y, w, h, txt, confidence=conf)
            blurbs.append(blurb)
        '''
    for line in non_furigana:
      x=line[1].start
      y=line[0].start
      w=line[1].stop-x
      h=line[0].stop-y
      roi = cv2.cv.CreateImage((w,h), 8, 1)
      sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
      cv2.cv.Copy(sub, roi)
      tesseract.SetCvImage(roi, api)
      txt=api.GetUTF8Text()
      conf=api.MeanTextConf()
      if conf>0:
        blurb = Blurb(x, y, w, h, txt, confidence=conf)
        blurbs.append(blurb)
    '''
    return blurbs