def __init__(self, empty_char=properties.empty_char, is_eval=False):
     self.empty_char = empty_char
     self.is_eval = is_eval
     self.api_single_line = tesserocr.PyTessBaseAPI(
         lang='eng', psm=tesserocr.PSM.SINGLE_LINE, path=properties.tesseract_path, oem=tesserocr.OEM.LSTM_ONLY)
     self.api_single_block = tesserocr.PyTessBaseAPI(
         lang='eng', psm=tesserocr.PSM.SINGLE_BLOCK, path=properties.tesseract_path)
Example #2
0
def name_reader():
    """Read all of the images as they get introduced to the generater."""
    # First, build the character list.
    # The only characters that should be in this are 0-9 and any character in
    # the NAME_FORMATS.
    ##    char_list = set(''.join(NAME_FORMATS))
    ##    char_list.remove("#") # It is in the NAME_FORMATS but has special meaning.
    ##    # Because the list needs to be a string, convert it.
    ##    # Don't forget the numbers. We need those.
    ##    char_list = char_list.union("0123456789")
    ##    char_list = ''.join(sorted(char_list))
    # For test, trying a static list that intentially does not have all
    # letters.
    char_list = " 0123456789MPQTacefhilmnopqrstuy"

    image = yield None
    # Call the tesseract library and build the processing object ("ocr").
    # Specify that all text should be in a single line (SINGLE_LINE).
    with tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.SINGLE_LINE) as ocr:
        # Set the character list.
        ocr.SetVariable("tessedit_char_whitelist", char_list)
        while True:  # Process all of the imates as they are passed by .send()
            ocr.SetImage(image)
            match_name = ocr.GetUTF8Text()
            # Previously, confidence was also returned. However, it is not useful.
            image = yield match_name
            if ADAPTIVE_CLASSIFIER:
                ocr.ClearAdaptiveClassifier()
Example #3
0
def parse(url):
    '''Take url or path to file and return OCR'd text. Flag failure 
    with non-200 status code.
    '''
    if _is_url(url):
        try:
            buffer = cStringIO.StringIO(urllib.urlopen(url).read())
            img = Image.open(buffer)
            res = ocr(img)
            res['url'] = url
            return res
        except:
            return {
                'status': 500,
                'caption': "Couldn't retrieve the image at %s" % url,
                'url': url
            }
    else:  # is path to file
        with tesserocr.PyTessBaseAPI() as api:
            api.SetImageFile(url)
            text = api.GetUTF8Text().strip()
            return {
                'status': 200 if text else 415,
                'caption': text or 'Text not found.',
                'url': url
            }
Example #4
0
def imge(img_url):
    cv_img = cv2.imread(img_url, cv2.IMREAD_UNCHANGED)
    # since tesserocr accepts PIL images, converting opencv image to pil
    pil_img = Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))

    #initialize api
    api = tr.PyTessBaseAPI()
    try:
        # set pil image for ocr
        api.SetImage(pil_img)
        # Google tesseract-ocr has a page segmentation methos(psm) option for specifying ocr types
        # psm values can be: block of text, single text line, single word, single character etc.
        # api.GetComponentImages method exposes this functionality
        # function returns:
        # image (:class:`PIL.Image`): Image object.
        # bounding box (dict): dict with x, y, w, h keys.
        # block id (int): textline block id (if blockids is ``True``). ``None`` otherwise.
        # paragraph id (int): textline paragraph id within its block (if paraids is True).
        # ``None`` otherwise.
        boxes = api.GetComponentImages(tr.RIL.TEXTLINE, True)
        # get text
        text = api.GetUTF8Text()
        # iterate over returned list, draw rectangles
        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']
            cv2.rectangle(cv_img, (x, y), (x + w, y + h), color=(0, 255, 0))
            cv2.putText(cv_img, 'text', (x, y - 5), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, (0, 0, 255), 1)
    finally:
        api.End()
    cv2.imwrite(img_url, cv_img)
Example #5
0
    def ocr_whole(self):
        with tr.PyTessBaseAPI(
                path='C:\\Program Files\\Tesseract-OCR\\tessdata\\',
                lang='eng') as api:
            api.SetPageSegMode(tr.PSM.AUTO_OSD)
            item = self.img
            pil_img = Image.fromarray(cv2.cvtColor(item, cv2.COLOR_BGR2RGB))
            api.SetImage(pil_img)
            item_sub_marked = np.copy(item)

            boxes = api.GetComponentImages(tr.RIL.BLOCK, True)
            # get text
            # text = api.GetUTF8Text()
            # print(text)
            # iterate over returned list, draw rectangles
            for (im, box, _, _) in boxes:
                x, y, w, h = box['x'], box['y'], box['w'], box['h']
                cv2.rectangle(item_sub_marked, (x, y), (x + w, y + h),
                              color=(0, 0, 255))

            # out = pytesseract.image_to_string(~resized_afterd, lang='eng', config='--psm 3')
            # out_t = out.strip()
            # if len(out_t) != 0:
            #     item_text_list.append(out_t)
            #     # s = s + f"\nitem {i}:\n" + out_t
            #     s = s + f"\n" + out_t
            path = os.path.join(self.img_ocr_marked_dir,
                                f"{self.filename_without_ext}.png")
            cv2.imwrite(path, item_sub_marked)
            plt.show()
def image_accuracy_test(images, labels):
    correctForms, predictedForms = 0, 0
    times, accuracyPercentages = [], []
    with tesserocr.PyTessBaseAPI() as api:
        for i in range(len(images)):
            startTime = int(round(time.time() * 1000))
            api.SetImageFile(images[i])
            imageText = api.GetUTF8Text().lower(
            )  #converts the output of OCR to lowercase
            #print(imageText)
            predictionAmount, correctPredictions = 0, 0
            labelStringArray = (labels[i]).split()
            for labelStr in labelStringArray:
                if labelStr.lower(
                ) in imageText:  #compare lowercase label with output of OCR
                    correctPredictions += 1
                # else:
                #      print('File: ', images[i],'|||Not recognized: ',labelStr)
                predictionAmount += 1

            predictedForms += 1
            if correctPredictions / predictionAmount > .999999:
                correctForms += 1
            accuracyPercentages.append(correctPredictions / predictionAmount)
            times.append(int(round(time.time() * 1000)) - startTime)

    print('Forms Processed: ', predictedForms)
    print('Forms Perfectly Recognized: ', correctForms)
    print('Average Processing Time: ',
          sum(times) / len(times), ' milliseconds')
    print('Average Cell Accuracy: ',
          100 * sum(accuracyPercentages) / len(accuracyPercentages),
          '% of cells correctly recognized\n')
Example #7
0
def get_apis(model_names):
    apis = {}
    for model_name in model_names:
        apis[model_name] = tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX,
                                                   lang=model_name,
                                                   psm=tesserocr.PSM.RAW_LINE)
    return apis
Example #8
0
def image_to_string(img, lang):
    with tesserocr.PyTessBaseAPI(lang=lang, psm=3) as api:
        api.SetVariable("tessedit_char_whitelist", " \n" + CHARSET)
        api.SetImage(img)
        api.Recognize()

        words = []

        level = tesserocr.RIL.WORD
        for r in tesserocr.iterate_level(api.GetIterator(), level):
            try:
                word = r.GetUTF8Text(level)
            except RuntimeError:
                continue
            conf = r.Confidence(level)

            # print(f"{word} ({conf})")

            if words:
                previous = words[-1]
                if regex.match(r"[\p{Lu}\p{Ll}]+\-$",
                               previous):  # dash=>combine
                    words[-1] = words[-1][:-1] + word
                    continue

            # if conf > 0.95 or (all(c in LETTERS for c in word) and conf > 0.9):
            #     words.append(word)
            #     continue
            #
            # print(f"LOWCONF! {word} ({conf})")

            words.append(word)

    return filter(bool, words)
 def get_word_bounding_boxes(self, page):
     results = []
     boxes = pytesseract.image_to_data(page)
     api = tr.PyTessBaseAPI()
     api.SetImage(page)
     boxes2 = api.GetComponentImages(tr.RIL.WORD, True)
     print(len(boxes2))
     for i, (im, box, _, _) in enumerate(boxes2):
         # im is a PIL image object
         # box is a dict with x, y, w and h keys
         api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
     words = 0
     for x, b in enumerate(boxes.splitlines()):
         if x != 0:
             b = b.split()
             if len(b) == 12:
                 words += 1
                 x, y, w, h, word = int(b[6]), int(b[7]), int(b[8]), int(
                     b[9]), b[11]
                 top_left_corner = (x, y)
                 bottom_right_corner = (w + x, h + y)
                 # page = np.array(page)
                 results.append([(top_left_corner, bottom_right_corner),
                                 word])
     return results
Example #10
0
    def ocr_regions(self):
        def pre_process_region(region):
            # get rid of the color
            pre = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
            # Otsu threshold
            # pre = cv2.threshold(pre, 200, 255, cv2.THRESH_BINARY)[1]
            return pre

        with tr.PyTessBaseAPI(
                path='C:\\Program Files\\Tesseract-OCR\\tessdata\\',
                lang='eng',
                psm=tr.PSM.AUTO,
                oem=tr.OEM.LSTM_ONLY) as api:
            api.SetPageSegMode(tr.PSM.SINGLE_COLUMN)
            for i in range(len(self.img_region_list)):
                item = pre_process_region(self.img_region_list[i])
                pil_img = Image.fromarray(item)
                api.SetImage(pil_img)
                item_sub_marked = np.copy(item)

                boxes = api.GetComponentImages(tr.RIL.BLOCK, True)
                # get text
                text = api.GetUTF8Text()
                print(text)
                # iterate over returned list, draw rectangles
                for (im, box, _, _) in boxes:
                    x, y, w, h = box['x'], box['y'], box['w'], box['h']
                    cv2.rectangle(item_sub_marked, (x, y), (x + w, y + h),
                                  color=(0, 0, 255))

                path = os.path.join(self.img_regions_ocr_marked_dir,
                                    f"{self.filename_without_ext}_{i}.png")
                cv2.imwrite(path, item_sub_marked)
Example #11
0
 def __init__(self):
     self._lock = threading.Lock()
     self._condition = threading.Condition(self._lock)
     self._queued = []
     self._done = []
     self._stopped = False
     self._tesseract = tesserocr.PyTessBaseAPI()
     self.start()
Example #12
0
 def setUp(self):
     self._test_dir = os.path.abspath(os.path.dirname(__file__))
     self._image_file = os.path.join(self._test_dir, 'eurotext.tif')
     if pil_installed:
         with open(self._image_file, 'rb') as f:
             self._image = Image.open(f)
             self._image.load()
     self._api = tesserocr.PyTessBaseAPI(init=True)
Example #13
0
    def process(self):
        """
        Performs the cropping.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX) as tessapi:
            #  print(self.input_file_grp)
            for (n, input_file) in enumerate(self.input_files):
                #  print(input_file)
                pcgts = page_from_file(self.workspace.download_file(input_file))
                image = self.workspace.resolve_image_as_pil(pcgts.get_Page().imageFilename)
                log.debug("Cropping with tesseract")
                tessapi.SetImage(image)
                
                #
                # helper variables for saving the box coordinates
                #
                min_x = image.width
                min_y = image.height
                max_x = 0
                max_y = 0

                # iterate over all boxes and compare their extent
                # to the min and max values
                for component in tessapi.GetComponentImages(tesserocr.RIL.BLOCK, True):
                    points, index = points_from_xywh(component[1]), component[2]

                    #
                    # the region reference in the reading order element
                    #
                    ID = "region%04d" % index
                    log.debug("Detected region '%s': %s", ID, points)

                    for pair in points.split(' '):
                        x, y = (int(pair.split(',')[0]), int(pair.split(',')[1]))
                        if x < min_x:
                            min_x = x
                        if y < min_y:
                            min_y = y
                        elif x > max_x:
                            max_x = x
                        elif y > max_y:
                            max_y = y
                    log.debug("Updated page border: %i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))

                #
                # set the identified page border
                #
                brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y)))
                pcgts.get_Page().set_Border(brd)

                ID = concat_padded(self.output_file_grp, n)
                self.workspace.add_file(
                    ID=ID,
                    file_grp=self.output_file_grp,
                    mimetype=MIMETYPE_PAGE,
                    local_filename='%s/%s' % (self.output_file_grp, ID),
                    content=to_xml(pcgts).encode('utf-8'),
                )
Example #14
0
    def _line_height(self, polygon):
        key = tuple(polygon.centroid.coords[0])
        if key not in self._ocr:
            from .utils import polygons_to_mask
            mask = polygons_to_mask(self._unbinarized.shape, [polygon])

            minx, miny, maxx, maxy = polygon.bounds
            minx, miny = numpy.floor(numpy.array([minx,
                                                  miny])).astype(numpy.int32)
            maxx, maxy = numpy.ceil(numpy.array([maxx,
                                                 maxy])).astype(numpy.int32)

            pixels = self._unbinarized[miny:maxy, minx:maxx]
            mask = mask[miny:maxy, minx:maxx]
            pixels[numpy.logical_not(mask)] = 255

            with tesserocr.PyTessBaseAPI(
                    psm=tesserocr.PSM.SINGLE_BLOCK) as api:
                api.SetImage(PIL.Image.fromarray(pixels, "L"))

                heights = []
                for i, data in enumerate(api.GetTextlines()):
                    bbox = data[1]
                    heights.append(bbox["h"])

                if heights:
                    n_lines = len(heights)
                    lh = numpy.min(heights)
                else:
                    lh = maxy - miny
                    n_lines = 1

                if self._debug:
                    api.Recognize()

                    ri = api.GetIterator()
                    level = tesserocr.RIL.TEXTLINE

                    text = ""
                    #lines = []
                    for r in tesserocr.iterate_level(ri, level):
                        #baseline = r.Baseline(level)
                        #if baseline:
                        #	p1, p2 = baseline
                        #	lines.append(shapely.geometry.LineString([p1, p2]))

                        try:
                            text += r.GetUTF8Text(level) + " "
                        except RuntimeError:
                            pass

                    #print("txt", text.strip(), "lh", lh, "#", n_lines)
                else:
                    text = ""

            self._ocr[key] = (n_lines, lh, text)

        return self._ocr[key]
Example #15
0
def preprocess(observation):
    if (observation[0] != None):
        timeStop = datetime.datetime.now()
        print("voor frame aanpassing: ", timeStop - timeStart)
        buttonList = []
        observation = np.array(
            observation[0]['vision'])  # convert list to 3D-array
        img = observation[
            125:387, 9:
            270]  # convert to 210-50x160 input (geel ook al uitgefilterd) anders x = 75
        cv2.imwrite('SequenceBefore.png', img)
        image = Image.open('SequenceBefore.png')
        # convert to grayscale
        img = image.convert('L')
        img_np = np.array(img)
        img_np = (img_np > 100) * 255
        img = PIL.Image.fromarray(img_np.astype(np.uint8))
        img = img.resize((int(img.size[0] * 3.5), int(img.size[1] * 3.5)),
                         PIL.Image.BILINEAR)
        timeStop = datetime.datetime.now()
        print("tijd tot aanpassen van frame: ", timeStop - timeStart)

        with tesserocr.PyTessBaseAPI() as api:
            api.SetImage(img)
            boxes = api.GetComponentImages(tesserocr.RIL.TEXTLINE, True)
            # print('Found {} textline image components.'.format(len(boxes)))
            if (len(boxes) == 5):
                for i, (im, box, _, _) in enumerate(boxes):
                    # im is a PIL image object
                    # box is a dict with x, y, w and h keys
                    api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                    ocrResult = api.GetUTF8Text().lower()
                    conf = api.MeanTextConf()
                    ocrResult = re.sub('[^a-zA-Z]', '', ocrResult)
                    print("the word is: ", ocrResult)
                    #TODO get correct vector
                    try:
                        # vector = np.random.random(300)
                        vector = modelNLP.wv[ocrResult]
                        # print("Found vector")
                        x__ = box['x'] / 3.3 + 20
                        y__ = box['y'] / 3.3 + 140
                        # print("x: " , x__)
                        # print("y: " , y__)
                        buttonList.append(Button(x__, y__, vector))
                    except KeyError as err:
                        print("Word not found in google...")
                        # return None
        if len(buttonList) == 5:
            timeStop = datetime.datetime.now()
            print(" tijd om alle OCR uit te voeren en de Word2Vec te doen: ",
                  timeStop - timeStart)
            return buttonList
        else:
            return None
    return None
Example #16
0
    def do_vanish(self, dark=False):
        """
        detect text in image and remove this applying a mask
        :param dark: default param for different operations based on image brightness
        :return:
        """
        org_img = cv2.imread(self.impath, cv2.IMREAD_UNCHANGED)
        # rename original image
        cv_img = org_img

        # if parameter is not True, try text detection with additional thresholding
        if not dark:
            cv_img = cv2.threshold(org_img, 209, 255, cv2.THRESH_BINARY)[1]

        # convert image to single channel
        gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)

        # convert opencv image to pil for processing with tesserocr
        pil_img = Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY))

        # initialize tesserocr api
        api = tr.PyTessBaseAPI()

        try:
            # set pil image for ocr class
            api.SetImage(pil_img)
            # get bounding boxes of text
            boxes = api.GetComponentImages(tr.RIL.TEXTLINE, True)
            # make copy of image for processing
            rec_img = cv_img.copy()
            # try text detection with tesserocr api
            text = api.GetUTF8Text()

            # try again with other param if no text found
            if not text.strip() and dark == False:
                self.do_vanish(dark=True)

            # iterate over returned list, draw rectangles
            for (im, box, _, _) in boxes:
                x, y, w, h = box['x'], box['y'], box['w'], box['h']
                cv2.rectangle(rec_img, (x, y), (x + w, y + h),
                              color=(0, 0, 255))

            # convert bounding boxes to coordinates
            coordinates = self.boxes_to_coordinates(boxes)
            # create mask of detected characters in image
            mask = self.create_mask(gray, coordinates)

            # convert to 8bit for inpainting function
            gray_8 = (mask).astype('uint8')
            # paint over the mask null values
            dst_TELEA = cv2.inpaint(org_img, gray_8, 3, cv2.INPAINT_TELEA)

        finally:
            api.End()
            return dst_TELEA
Example #17
0
def ocr_tesserocr(im, alphabet=None, single_line=False):
    with tesserocr.PyTessBaseAPI() as tess:
        if single_line:
            tess.SetPageSegMode(tesserocr.PSM.SINGLE_LINE)
        if alphabet:
            tess.SetVariable("tessedit_char_whitelist", alphabet)

        tess.SetImage(im)
        result = tess.GetUTF8Text()
    return result
Example #18
0
def font_details(req_image):
    with tesserocr.PyTessBaseAPI() as api:
        #image = Image.open(io.BytesIO(req_image))
        #image = req_image
        api.SetImageFile(req_image)
        api.Recognize()  # required to get result from the next line
        iterator = api.GetIterator()
        info = iterator.WordFontAttributes()
        print(info)
    return info["bold"], info["font_name"], info["pointsize"]
Example #19
0
    def _run(self):
        self._check_tessdir()
        self._prime_frame_queue()

        with tesserocr.PyTessBaseAPI(lang=self._config.language) as api:
            api.SetVariable("tessedit_write_images", "T")

            for key, value in self._config.tesseract_variables.items():
                _logger.info('Setting tesseract variable %s=%s', key, value)
                api.SetVariable(key, value)

            if self._config.clear_adaptive_classifier:
                _logger.info('Clearing adaptive classifier after each run')

            for counter in itertools.count():
                frame_data = self._frame_queue.get()

                if not frame_data:
                    break

                region_info = self._compute_regions()
                image = self._preprocess_image(frame_data, region_info)

                assert image.mode == 'L'
                api.SetImageBytes(bytes(image.getdata(0)), image.width,
                                  image.height, 1, image.width)

                text = api.GetUTF8Text().strip()
                ocr_image = api.GetThresholdedImage()

                if text:
                    confidence = api.MeanTextConf()
                    white_confidence = self._compute_white_region_confidence(
                        ocr_image, region_info)
                    confidence -= 100 - white_confidence
                    confidence = max(0, confidence)
                    self._text_filter.feed_text(
                        text,
                        confidence=confidence,
                        section=self._config.section_name)
                else:
                    self._text_filter.flush_text()

                if counter % (self._config.fps * 2) == 0:
                    debug_image = self._render_debug_image(
                        image, ocr_image, api, region_info)
                    self._text_filter.feed_image(
                        debug_image, section=self._config.section_name)

                if self._config.clear_adaptive_classifier:
                    api.ClearAdaptiveClassifier()

        self._text_filter.flush_text(0)
        _logger.info('Tesseract quit')
Example #20
0
    def process(self):
        """
        Performs the region segmentation.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX) as tessapi:
            print(self.input_file_grp)
            for (n, input_file) in enumerate(self.input_files):
                pcgts = from_file(self.workspace.download_file(input_file))
                image = self.workspace.resolve_image_as_pil(
                    pcgts.get_Page().imageFilename)
                log.debug("Detecting regions with tesseract")
                tessapi.SetImage(image)
                for component in tessapi.GetComponentImages(
                        tesserocr.RIL.BLOCK, True):
                    points, index = points_from_xywh(
                        component[1]), component[2]

                    #
                    # the region reference in the reading order element
                    #
                    ID = "region%04d" % index
                    log.debug("Detected region '%s': %s", ID, points)
                    # <pg:ReadingOrder>
                    ro = pcgts.get_Page().get_ReadingOrder()
                    if ro is None:
                        ro = ReadingOrderType()
                        pcgts.get_Page().set_ReadingOrder(ro)
                    # <pg:OrderedGroup>
                    og = ro.get_OrderedGroup()
                    if og is None:
                        og = OrderedGroupType(id="reading-order")
                        ro.set_OrderedGroup(og)
                    # <pg:RegionRefIndexed>
                    og.add_RegionRefIndexed(
                        RegionRefIndexedType(regionRef=ID, index=index))

                    #
                    #  text region
                    #
                    pcgts.get_Page().add_TextRegion(
                        TextRegionType(id=ID,
                                       Coords=CoordsType(points=points)))

                ID = concat_padded(self.output_file_grp, n)
                self.workspace.add_file(
                    ID=ID,
                    file_grp=self.output_file_grp,
                    basename=ID + '.xml',
                    mimetype=MIMETYPE_PAGE,
                    content=to_xml(pcgts).encode('utf-8'),
                )
Example #21
0
def ocr(name):
	path = Path.cwd()/"tmp"/name/"regions"
	regions = get_data(name)

	for image in path.glob("*.tiff"):
		img = Image.open(image)	
		with tr.PyTessBaseAPI(psm=tr.PSM.SINGLE_LINE) as api:
			api.SetImage(img)
			text = api.GetUTF8Text().replace("\n","").split()
			conf = api.AllWordConfidences()
			update_field(name,str(image),"text",text)
			update_field(name,str(image),"word_conf",conf)

	docManager.delete_regions(name)
Example #22
0
def preprocess(observation):
    if (observation[0] != None):
        buttonList = []
        observation = np.array(
            observation[0]['vision'])  # convert list to 3D-array
        img = observation[
            125:387, 9:
            270]  # convert to 210-50x160 input (geel ook al uitgefilterd) anders x = 75
        cv2.imwrite('SequenceBefore.png', img)
        image = Image.open('SequenceBefore.png')
        # convert to grayscale
        img = image.convert('L')
        img_np = np.array(img)
        img_np = (img_np > 100) * 255
        img = PIL.Image.fromarray(img_np.astype(np.uint8))
        img = img.resize((int(img.size[0] * 3.5), int(img.size[1] * 3.5)),
                         PIL.Image.BILINEAR)

        with tesserocr.PyTessBaseAPI() as api:
            api.SetImage(img)
            boxes = api.GetComponentImages(tesserocr.RIL.TEXTLINE, True)
            # print('Found {} textline image components.'.format(len(boxes)))
            if (len(boxes) == 5):
                for i, (im, box, _, _) in enumerate(boxes):
                    # im is a PIL image object
                    # box is a dict with x, y, w and h keys
                    api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                    ocrResult = api.GetUTF8Text().lower()
                    conf = api.MeanTextConf()

                    ocrResult = re.sub('[\s+]', '', ocrResult)
                    # print("the word is: " , ocrResult)
                    #TODO vecotr afhankelijk van OCRresult
                    try:
                        vector = modelNLP.wv[ocrResult]
                        vector = np.random.normal(vector, vectorDeviation)
                    except KeyError as err:
                        vector = modelNLP.wv["house"]
                        print("word not found in google...")
                        # return None
                    x__ = box['x'] / 3.3 + 15
                    y__ = box['y'] / 3.3 + 140
                    # print("x: " , x__)
                    # print("y: " , y__)
                    buttonList.append(Button(x__, y__, vector))
        if len(buttonList) == 5:
            return buttonList
        else:
            return None
    return None
Example #23
0
File: ocr.py Project: HqWei/mmocr
    def get_tesserocr_api():
        """Get tesserocr api depending on different platform."""
        import subprocess
        import sys

        if sys.platform == 'linux':
            api = tesserocr.PyTessBaseAPI()
        elif sys.platform == 'win32':
            try:
                p = subprocess.Popen(
                    'where tesseract', stdout=subprocess.PIPE, shell=True)
                s = p.communicate()[0].decode('utf-8').split('\\')
                path = s[:-1] + ['tessdata']
                tessdata_path = '/'.join(path)
                api = tesserocr.PyTessBaseAPI(path=tessdata_path)
            except RuntimeError:
                raise RuntimeError(
                    'Please install tesseract first.\n Check out the'
                    ' installation guide at'
                    ' https://github.com/UB-Mannheim/tesseract/wiki')
        else:
            raise NotImplementedError
        return api
Example #24
0
def time_reader():
    """Read all of the images as they get introduced to the generater."""
    image = yield None

    # Call the tesseract library and build the processing object ("ocr").
    # Specify that all text should be in a single chunk (SINGLE_WORD).
    with tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.SINGLE_WORD) as ocr:
        # We are looking for time. This means we are looking for numbers.
        ocr.SetVariable("tessedit_char_whitelist", "0123456789")
        while True:
            ocr.SetImage(image)  # Set the image.
            match_time = ocr.GetUTF8Text()  # Get the result (takes a bit)
            image = yield match_time  # Return and get new image.
            if ADAPTIVE_CLASSIFIER:
                ocr.ClearAdaptiveClassifier()
Example #25
0
def get_textlines(pil_image):
    with tesserocr.PyTessBaseAPI() as api:
        api.SetImage(pil_image)
        boxes = api.GetComponentImages(tesserocr.RIL.TEXTLINE, True)
        for i, (im, box, _, _) in enumerate(boxes):
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            text = api.GetUTF8Text().strip()
            if not text:
                continue
            yield {
                'box': box,
                'confidence': api.MeanTextConf(),
                'text': text,
                'image': im,
            }
Example #26
0
    def frame(self):
        import tesserocr

        with tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.AUTO_ONLY) as api:
            api.SetImage(self.foreground)

            pts = []
            for _, bbox in api.GetConnectedComponents():
                x = bbox["x"]
                y = bbox["y"]
                w = bbox["w"]
                h = bbox["h"]
                pts.append((x, y))
                pts.append((x + w, y + h))

        pts = numpy.array(pts, dtype=numpy.int32)
        return cv2.boundingRect(pts)
Example #27
0
def tessocr(impath, mnm):
    namelist = []
    #    impath=key
    namelist.append(mnm)
    outdict = {}
    impath = 'thresimages/' + impath
    for i in (7, 8, 9, 10, 13):
        with tesserocr.PyTessBaseAPI(path='tessdata', lang='eng',
                                     psm=i) as api:
            api.SetImageFile(impath)
            a = api.GetUTF8Text()
            namelist.append(re.sub('\W+', '', a))
            print(namelist)
            outdict[i] = namelist
            namelist = []
    with open(impath[:-4] + '.txt', 'w') as json_file:
        json.dump(outdict, json_file, indent=4)
Example #28
0
def block_detail(path, left, top, right, bottom):
    with tesserocr.PyTessBaseAPI(lang='chi_sim+eng') as api:
        api.SetImageFile(path)
        api.SetVariable("save_blob_choices", "T")
        # api.SetRectangle(left, top, right, bottom)
        api.SetRectangle(left, top, (right-left)*0.9, (bottom-top)*0.9)
        # print("执行过1")
        api.Recognize()
        # print("执行过2")
        iterator = api.GetIterator()
        # print("执行过3")
        # print(api.GetUTF8Text()) # 识别的具体文字,但是这个并不精准
        #vprint(iterator.RowAttributes()) # 每一行的具体信息 这个对结果无意义
        dic = iterator.WordFontAttributes()
        print(dic)
        if dic is None:
            return '/* 检测出错 */'
        else:
            new_dict = ''
            if not dic['serif']:
                new_dict = "font-family: " + dic["font_name"]
            else:
                serif = 'serif'
                new_dict = "font-family: " + dic["font_name"] + ',' + serif
            if not dic['monospace']:
                new_dict = new_dict + ";\n"
            else:
                new_dict = new_dict + "," + "monospace;\n"
            if not dic['bold']:
                new_dict = new_dict + "font-weight: " + "normal;\n"
            else:
                new_dict = new_dict + "font-weight: " + "bold;\n"
            if not dic['italic']:
                new_dict = new_dict + "font-style: " + "normal;\n"
            else:
                new_dict = new_dict + "font-style: " + "italic;\n"
            if not dic['smallcaps']:
                new_dict = new_dict + "font-variant: " + "normal;\n"
            else:
                new_dict = new_dict + "font-variant: " + "small-caps;\n"
            if not dic['underlined']:
                new_dict = new_dict + "text-decoration: " + 'none;\n'
            else:
                new_dict = new_dict + "text-decoration: " + "underline;\n"
            new_dict = new_dict + "font-size: " + str(dic['pointsize']/5) + ';'
            return  new_dict
Example #29
0
def ocr(file, lang='eng'):
    """提取图中文字
    :param file: 二维码图片路径
    :param lang: 语言, 默认英语
    """
    img = Image.open(file).convert('L')
    with tesserocr.PyTessBaseAPI(lang=lang) as api:
        api.SetImage(denoise(img))
        text = api.GetUTF8Text().strip()

        if text == '':
            api.SetImage(ImageOps.invert(denoise(img, 100)))
            text = api.GetUTF8Text().strip()

    if text == '':
        text = 'No Text Found!'

    showtext(text)
Example #30
0
    def ocr(self):
        s = ""
        # item_text_list = []
        # for i in range(len(self.img_item_list)):
        #     item = self.img_item_list[i]
        #     out = pytesseract.image_to_string(item)
        #     out_t = out.strip()
        #     if len(out_t) != 0:
        #         item_text_list.append(out_t)
        #         # s = s + f"\nitem {i}:\n" + out_t
        #         s = s + f"\n" + out_t
        item_text_list = []
        sub_list = []
        with tr.PyTessBaseAPI(
                path='C:\\Program Files\\Tesseract-OCR\\tessdata\\',
                lang='eng') as api:
            api.SetPageSegMode(tr.PSM.SPARSE_TEXT)
            for i in range(len(self.img_item_list)):
                item = self.img_item_list[i]
                pil_img = Image.fromarray(cv2.cvtColor(item,
                                                       cv2.COLOR_BGR2RGB))
                api.SetImage(pil_img)
                item_sub_marked = np.copy(item)

                boxes = api.GetComponentImages(tr.RIL.PARA, True)
                # get text
                # text = api.GetUTF8Text()
                # print(text)
                # iterate over returned list, draw rectangles
                for (im, box, _, _) in boxes:
                    x, y, w, h = box['x'], box['y'], box['w'], box['h']
                    cv2.rectangle(item_sub_marked, (x, y), (x + w, y + h),
                                  color=(0, 0, 255))

                path = os.path.join(self.img_item_sub_marked_dir,
                                    f"{self.filename_without_ext}_{i}.png")
                cv2.imwrite(path, item_sub_marked)
        for i in range(len(sub_list)):
            path = os.path.join(self.img_item_sub_dir,
                                f"{self.filename_without_ext}_{i}.png")
            cv2.imwrite(path, sub_list[i])
        print(f"{self.filename_without_ext}============")
        print(s)
        print("============")