Python PyTessBaseAPI.Init Beispiele

Programmiersprache: Python

Namespace / Paketname: tesserocr

Klasse / Typ: PyTessBaseAPI

Methode / Funktion: Init

Beispiele auf hotexamples.com: 5

Python PyTessBaseAPI.Init - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die tesserocr.PyTessBaseAPI.Init, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

SetImage(30)

GetUTF8Text(30)

PyTessBaseAPI(30)

SetVariable(21)

SetImageFile(17)

End(14)

SetPageSegMode(12)

Recognize(11)

MeanTextConf(10)

SetRectangle(9)

GetComponentImages(5)

Init(5)

GetThresholdedImage(4)

AllWordConfidences(4)

GetIterator(4)

Clear(3)

GetInitLanguagesAsString(2)

AnalyseLayout(2)

SetSourceResolution(2)

GetPageSegMode(2)

GetHOCRText(1)

__exit__(1)

DetectOrientationScript(1)

GetTextlines(1)

SetImageBytes(1)

AllWords(1)

GetRegions(1)

IsValidCharacter(1)

InitFull(1)

GetTSVText(1)

__init__(1)

Beispiel #1

Datei anzeigen

Datei: recognize.py Projekt: mcrouse911/findpeopleviadocument

class OCR(object):
    MAX_MODELS = 5
    DEFAULT_MODE = PSM.AUTO_OSD

    # DEFAULT_MODE = PSM.AUTO

    def __init__(self):
        # Tesseract language types:
        _, self.supported = get_languages()

    def language_list(self, languages):
        models = [c for c in alpha3(languages) if c in self.supported]
        if len(models) > self.MAX_MODELS:
            log.warning("Too many models, limit: %s", self.MAX_MODELS)
            models = models[:self.MAX_MODELS]
        models.append('eng')
        return '+'.join(sorted(set(models)))

    def configure_engine(self, languages, mode):
        # log.info("Configuring OCR engine (%s)", languages)
        if not hasattr(self, 'api'):
            self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)
        if languages != self.api.GetInitLanguagesAsString():
            self.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        if mode != self.api.GetPageSegMode():
            self.api.SetPageSegMode(mode)
        return self.api

    def extract_text(self, data, languages=None, mode=DEFAULT_MODE):
        """Extract text from a binary string of data."""
        languages = self.language_list(languages)
        api = self.configure_engine(languages, mode)

        try:
            image = Image.open(BytesIO(data))
            # TODO: play with contrast and sharpening the images.
            start_time = time.time()
            api.SetImage(image)
            text = api.GetUTF8Text()
            confidence = api.MeanTextConf()
            end_time = time.time()
            duration = end_time - start_time
            log.info("%s chars (w: %s, h: %s, langs: %s, c: %s), took: %.5f",
                     len(text), image.width, image.height, languages,
                     confidence, duration)
            return text
        except Exception as ex:
            log.exception("Failed to OCR: %s", languages)
        finally:
            api.Clear()

Beispiel #2

Datei anzeigen

class TT2Predictor:
    """holds the several trainer predictor instances and common operations """
    def __init__(self, **kwargs):
        self.trainers_predictors_list = []
        self.text_predictors_list = [
            ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"),
            ("main_level", (1203, 323, 1223, 399), "0123456789", "8"),
            ("next_level", (1212, 445, 1230, 493), "0123456789", "8"),
            ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"),
            ("gold", (1091, 283, 1126, 471),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "7"),
            ("current_dps_down_no_tab", (389, 562, 423, 709),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "8"),
            ("last_hero", (124, 109, 148, 430),
             "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
             "7")
        ]
        self.api = PyTessBaseAPI()
        self.api.Init()
        print(tesserocr.tesseract_version())
        print(tesserocr.get_languages())
        self.global_image = None
        self.status = CurrentStatus()

        boss_trainer = TrainerPredictor(
            "boss_active_predictor",
            ["boss_active", "boss_inactive", "no_boss"],
            (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30])
        egg_trainer = TrainerPredictor("egg_active_predictor",
                                       ["egg_active", "egg_inactive"],
                                       (741, 31, 761, 64), 10, 16, 255.0,
                                       [200, 30])
        gold_pet_trainer = TrainerPredictor(
            "gold_pet_predictor",
            ["goldpet", "nopet", "normalpet", "partial pet"],
            (624, 364, 734, 474), 40, 40, 255.0, [200, 30])
        tab_predictor = TrainerPredictor("tab_predictor", [
            "skills_tab", "heroes_tab", "equipment_tab", "pet_tab",
            "relic_tab", "shop_tab", "no_tab"
        ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30])
        self.trainers_predictors_list.append(boss_trainer)
        self.trainers_predictors_list.append(egg_trainer)
        self.trainers_predictors_list.append(gold_pet_trainer)
        self.trainers_predictors_list.append(tab_predictor)
        for trainer in self.trainers_predictors_list:
            pass
            #trainer.crop_images()
            #trainer.process_images()
            #trainer.read_and_pickle()
            #trainer.train_graph()
        saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle"
        save_pickle(saved_classes_file, self.trainers_predictors_list)

    def parse_raw_image(self):
        with open(glo.RAW_FULL_FILE, 'rb') as f:
            image = Image.frombytes('RGBA', (1280, 720), f.read())
        for class_predictor in self.trainers_predictors_list:
            class_predictor.predict_crop(image)
        self.global_image = image
        image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" +
                   time.strftime("%Y%m%d-%H%M%S-%f") +
                   ".png")  # save original capture copy

    def parse_image_text(self, predict_map):
        return_dict = {}
        for text_predictor in self.text_predictors_list:
            if text_predictor[0] in predict_map:
                img = self.global_image.crop(text_predictor[1])

                img = img.convert('L')
                img = img.rotate(90, expand=True)
                self.api.SetImage(img)
                self.api.SetVariable("tessedit_char_whitelist",
                                     text_predictor[2])
                self.api.SetVariable("tessedit_pageseg_mode",
                                     text_predictor[3])
                self.api.SetVariable("language_model_penalty_non_dict_word",
                                     "0")
                self.api.SetVariable("doc_dict_enable", "0")
                text_capture = self.api.GetUTF8Text().encode('utf-8').strip()
                return_dict[text_predictor[0]] = text_capture
                print("raw text capture ", text_predictor[0], ":",
                      text_capture)
                self.api.Clear()
        return return_dict

    def predict_parsed_all(self):
        pred_dict = {}
        for class_predictor in self.trainers_predictors_list:
            pred_dict[class_predictor.name] = class_predictor.predict_parsed()
        return pred_dict

    def predict_parsed(self, predict_map, predict_map_text, **kwargs):
        pred_dict = {"transition_level": False}

        # check if image is level trasitioning. trivial prediction.
        if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False:
            pass
        else:
            img = self.global_image.crop((0, 0, 100, 100))  # black corner
            extrema = img.convert("L").getextrema()
            if extrema[0] == extrema[1]:  # only one color
                print("warning level transitioning")
                pred_dict["transition_level"] = True
            else:
                pass

        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name in predict_map:
                pred_dict[
                    class_predictor.name] = class_predictor.predict_parsed()
        pred_dict_text = self.parse_image_text(predict_map_text)
        pred_dict.update(pred_dict_text)
        self.status.update_status(pred_dict, self.trainers_predictors_list)
        return pred_dict

    def predict(self):
        self.parse_raw_image()
        return self.predict_parsed_all()

    def check_predict(self, pred_dict, predictor, classification):
        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name == predictor:
                return int(
                    pred_dict[predictor]
                ) == class_predictor.pred_classes.index(classification)

Beispiel #3

Datei anzeigen

Datei: donation_recognizer.py Projekt: nashby/arthas-networth

class DonationRecognizer:
    TESSERACT_THRESHOLD = 75

    def __init__(self):
        self.tesseract_client = PyTessBaseAPI(lang='rus+eng')

    def mask_donator_name_and_amount(self, frame):
        hsv_frame = cv.cvtColor(frame, cv.COLOR_BGR2HSV)

        # Blue color range donator name and donate amount
        lower_val = np.array([90, 155, 155])
        upper_val = np.array([103, 255, 255])
        mask = cv.inRange(hsv_frame, lower_val, upper_val)

        masked_frame = cv.bitwise_not(mask)

        masked_frame = cv.morphologyEx(masked_frame, cv.MORPH_CLOSE, (3,3))
        masked_frame = cv.GaussianBlur(masked_frame, (3,3), 0)

        return masked_frame

    def bottom_part_of_frame_with_donator_name_and_amount(self, frame):
        return frame[540:720, 180:1100]

    def top_part_of_frame_with_donator_name_and_amount(self, frame):
        return frame[0:80, 180:1100]

    def is_frame_empty(self, frame):
        return np.sum(frame == 0) < 200

    def find_donator_name_and_amount(self, frame, lang, results):
        # Frame doesn't have donation text
        if self.is_frame_empty():
            return [results, False]

        self.tesseract_client.Init(lang=lang)
        self.tesseract_client.SetImage(Image.fromarray(frame))

        tsv_data = self.tesseract_client.GetTSVText(0)
        parsed_tsv_data = csv.reader(StringIO(tsv_data), delimiter='\t')

        recognized = False

        for row in parsed_tsv_data:
            text = row[-1]
            conf = int(row[-2])
            left = int(row[-6])
            height = int(row[-3])
            width = int(row[-4])

            if conf >= TESSERACT_THRESHOLD:
                recognized = True
                index = None

                for i in range(-10, 10):
                    if left + i in results:
                        index = left + i

                index = index or left

                if not index in results:
                    results[index] = []

                results[index].append([conf, height, width, text])

        # Frame has something similar to donation text colorwise but it's not donation
        if len(results) <= 2:
            return [{}, False]

        return [results, recognized]

    def find_most_accurate(self, results):
        words = []

        for key, result in collections.OrderedDict(sorted(results.items())).items():
            common_height = Counter(list(map(lambda x: x[1], result))).most_common(1)[0][0]
            common_width = Counter(list(map(lambda x: x[2], result))).most_common(1)[0][0]

            filtered_result = list(filter(lambda x: x[1] == common_height and x[2] == common_width, result))

            words.append(max(filtered_result, key=lambda x: x[0])[3])

        return words

    def format_donation_data(self, donation):
        raw_donation = ' '.join(donation).strip()
        raw_donation = re.sub(r"((?<=RUB|USD|EUR)(?:.(?!(RUB|USD|EUR)))+)$", "", raw_donation)
        raw_donation = re.sub(r"^\W+", "", raw_donation)
        raw_donation = re.sub(r"\W+$", "", raw_donation)

        regexped = re.search("(.+?) (([\d\s]+\,?\d*) (RUB|USD|EUR)(?:!?)\Z)", raw_donation)

        if not regexped:
            return {
                'raw_donation': raw_donation
            }

        groups = regexped.groups()

        return {
            'raw_donation': raw_donation,
            'author': groups[0].rstrip(' -'),
            'amount': float(groups[2].replace(',', '.').replace(' ', '')),
            'currency': groups[3],
        }

    def vods_with_donations(self):
        date_format = '%Y-%m-%d %H:%M:%S'

        # Donations were enabled since Sep 30, 2015
        # First stream is https://www.youtube.com/watch?v=snkEcLcIPew
        first_donation_date = dt.datetime.strptime('2015-09-30 00:00:00', date_format)

        channel = pafy.get_channel("https://www.youtube.com/user/SpitefulDick")
        uploads = list(filter(lambda vod: dt.datetime.strptime(vod.published, date_format) >= first_donation_date, channel.uploads))

        return sorted(uploads, key=lambda vod: vod.published)

    def save(self, data):
        donation = Donation(
            raw_donation=data.get('raw_donation'),
            author=data.get('author'),
            amount=data.get('amount'),
            currency=data.get('currency'),
            donated_at=data.get('donated_at'),
            donation_image=memoryview(data.get('donation_image')),
            vod_youtube_id=self.vod.videoid,
            vod_published_at=self.vod.published
        )

        db.session.add(donation)
        db.session.commit()

    def update(self, donation, data):
        donation.updated_at = dt.datetime.now()
        donation.raw_donation = data.get('raw_donation')
        donation.donation_image = data.get('donation_image')
        donation.author = data.get('author')
        donation.amount = data.get('amount')
        donation.currency = data.get('currency')

        db.session.commit()

    def recognize(self, video, vod, frame_start, break_after_recognition, callback):
        self.video = video
        self.vod = vod

        frame_number = 1

        first_extracted_at = None
        first_recognized_frame = None
        results = {}
        recognized = False

        not_recognized_times = 0

        ret, frame = self.video.read()
        lang = "rus"

        while(ret):
            # Get every 30th frame since youtube has 30 frames per second
            if frame_number % 30 == 0 and frame_number / 30 > frame_start:
                frame_part = self.bottom_part_of_frame_with_donator_name_and_amount(frame)
                masked_frame = self.mask_donator_name_and_amount(frame_part)

                # Try to find donation at the bottom of frame
                results, recognized = self.find_donator_name_and_amount(masked_frame, lang, results)

                # Try to find donation at the top of frame
                if not recognized:
                    frame_part = self.top_part_of_frame_with_donator_name_and_amount(frame)
                    masked_frame = self.mask_donator_name_and_amount(frame_part)

                    results, recognized = self.find_donator_name_and_amount(masked_frame, lang, results)

                # Switch languages for Tesseract since donation author can have name in russian, english or mixed
                if lang == "rus":
                    lang = "eng"
                else:
                    lang = "rus"

                if recognized:
                    first_extracted_at = first_extracted_at or frame_number

                    # Store frame when donation was detected at first. We save this data later
                    if first_recognized_frame is None:
                        first_recognized_frame = frame_part
                elif results and not_recognized_times >= 3:
                    words = self.find_most_accurate(results)
                    formatted_donation_data = self.format_donation_data(words)

                    if formatted_donation_data:
                        formatted_donation_data['donated_at'] = int(first_extracted_at / 30)
                        _, formatted_donation_data['donation_image'] = cv.imencode('.webp', first_recognized_frame, [cv.IMWRITE_WEBP_QUALITY, 20])
                        callback(formatted_donation_data)

                    results = {}
                    first_extracted_at = None
                    first_recognized_frame = None
                    not_recognized_times = 0

                    if break_after_recognition:
                        break
                else:
                    not_recognized_times = not_recognized_times + 1

                    if not_recognized_times >= 4 and break_after_recognition:
                        break

            frame_number = frame_number + 1
            ret, frame = self.video.read()

Beispiel #4

Datei anzeigen

#with PyTessBaseAPI() as api:
#    for img in images:
#        api.SetImageFile(img)
#        print( api.GetUTF8Text())
#        print( api.AllWordConfidences())

img = Image.open(
    glo.DATA_FOLDER + '/number_range_predictorcropped3.png'
)  #glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + '/fullcapture961 .png')
#img = img.convert('L')

from tesserocr import PyTessBaseAPI, RIL, iterate_level, PSM
#print(help(tesserocr))

api = PyTessBaseAPI()
api.Init()
api.SetImageFile(glo.DATA_FOLDER + '/number_range_predictorcropped3.png')
api.SetVariable("tessedit_pageseg_mode", "7")
api.SetVariable("language_model_penalty_non_dict_word", "0")
api.SetVariable("doc_dict_enable", "0")
print("recognized txt:", api.GetUTF8Text().encode('utf-8').strip())
#api.Recognize()
"""
ri = api.GetIterator()
level = RIL.SYMBOL
for r in iterate_level(ri, level):
    symbol = r.GetUTF8Text(level)  # r == ri
    conf = r.Confidence(level)
    print(u'symbol {}, conf: {}'.format(symbol, conf).encode('utf-8').strip())
    indent = False
    ci = r.GetChoiceIterator()

Beispiel #5

Datei anzeigen

def ocr_on_bounding_boxes(img, components):

    blurbs = []
    for component in components:
        (aspect, vertical, horizontal) = segment_into_lines(img, component)
        #if len(vertical)<2 and len(horizontal)<2:continue

        #attempt to separately process furigana
        #(furigana, non_furigana) = estimate_furigana(vertical)
        '''
      from http://code.google.com/p/tesseract-ocr/wiki/ControlParams
      Useful parameters for Japanese and Chinese

      Some Japanese tesseract user found these parameters helpful for increasing tesseract-ocr (3.02) accuracy for Japanese :

      Name 	Suggested value 	Description
      chop_enable 	T 	Chop enable.
      use_new_state_cost 	F 	Use new state cost heuristics for segmentation state evaluation
      segment_segcost_rating 	F 	Incorporate segmentation cost in word rating?
      enable_new_segsearch 	0 	Enable new segmentation search path.
      language_model_ngram_on 	0 	Turn on/off the use of character ngram model.
      textord_force_make_prop_words 	F 	Force proportional word segmentation on all rows. 
    '''
        #now run OCR on this bounding box
        api = PyTessBaseAPI(path='C:/Program Files/Tesseract-OCR/tessdata')
        api.Init(".", "jpn", tesseract.OEM_DEFAULT)
        #handle single column lines as "vertical align" and Auto segmentation otherwise
        if len(vertical) < 2:
            api.SetPageSegMode(
                5)  #tesseract.PSM_VERTICAL_ALIGN)#PSM_AUTO)#PSM_SINGLECHAR)#
        else:
            api.SetPageSegMode(tesseract.PSM_AUTO)  #PSM_SINGLECHAR)#
        api.SetVariable('chop_enable', 'T')
        api.SetVariable('use_new_state_cost', 'F')
        api.SetVariable('segment_segcost_rating', 'F')
        api.SetVariable('enable_new_segsearch', '0')
        api.SetVariable('language_model_ngram_on', '0')
        api.SetVariable('textord_force_make_prop_words', 'F')
        api.SetVariable('tessedit_char_blacklist', '}><L')
        api.SetVariable('textord_debug_tabfind', '0')

        x = component[1].start
        y = component[0].start
        w = component[1].stop - x
        h = component[0].stop - y
        roi = cv2.cv.CreateImage((w, h), 8, 1)
        sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
        cv2.cv.Copy(sub, roi)
        tesseract.SetCvImage(roi, api)
        txt = api.GetUTF8Text()
        conf = api.MeanTextConf()
        if conf > 0 and len(txt) > 0:
            blurb = Blurb(x, y, w, h, txt, confidence=conf)
            blurbs.append(blurb)
        '''
    for line in non_furigana:
      x=line[1].start
      y=line[0].start
      w=line[1].stop-x
      h=line[0].stop-y
      roi = cv2.cv.CreateImage((w,h), 8, 1)
      sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
      cv2.cv.Copy(sub, roi)
      tesseract.SetCvImage(roi, api)
      txt=api.GetUTF8Text()
      conf=api.MeanTextConf()
      if conf>0:
        blurb = Blurb(x, y, w, h, txt, confidence=conf)
        blurbs.append(blurb)
    '''
    return blurbs