Ejemplo n.º 1
0
def build_model(config):
    vocab = Vocab(config['vocab'])
    device = config['device']

    model = OCR(len(vocab), config)
    model = model.to(device)

    return model, vocab
Ejemplo n.º 2
0
 def __init__(self, initialize_whatsapp=True, session=None):
     chrome_options = Options()
     if session:
         chrome_options.add_argument("--user-data-dir={}".format(session))
         try:
             self.browser = webdriver.Chrome(options=chrome_options)
         except:
             # if previous session is left open, close it
             if getWindowsWithTitle('WhatsApp - Google Chrome'):
                 getWindowsWithTitle('WhatsApp - Google Chrome')[0].close()
                 info("Session is already open. \"WhatsApp - Google Chrome\" is closing...")
             if getWindowsWithTitle('New Tab - Google Chrome'):
                 getWindowsWithTitle('New Tab - Google Chrome')[0].close()
                 info("Session is already open. \"New Tab - Google Chrome\" is closing...")
             self.browser = webdriver.Chrome(options=chrome_options)
     else:
         self.browser = webdriver.Chrome()
         info("Chrome Driver is initialized successfully.")
     if initialize_whatsapp:
         self.browser.get("https://web.whatsapp.com/")
         info("WhatsApp Web Client is opening...")
         self.find_wait("copyable-text.selectable-text", By.CLASS_NAME, timeout=30)
         self.browser.maximize_window()
         self.OCR = OCR()
         from splunk import Splunk
         self.splunk = Splunk()
         from bot import Bot
         self.bot = Bot(self.splunk)
Ejemplo n.º 3
0
 def onLeftButtonUp(event):
     self.sel = False
     try:
         self.canvas.delete(lastDraw)
     except Exception as e:
         pass
     sleep(0.1)
     #考虑鼠标左键从右下方按下而从左上方抬起的截图
     left, right = sorted([self.X.get(), event.x])
     top, bottom = sorted([self.Y.get(), event.y])
     pic = ImageGrab.grab((left + 1, top + 1, right, bottom))
     #弹出保存截图对话框
     fileName = "/home/robotsl/workspace/ocr_enhancement/OCR/images/for_ocr.png"
     #关闭当前窗口
     self.top.destroy()
     pic.save(fileName)
     tk_obj.init_window.state('normal')
     result, log = OCR.OCR_OR_LOGMAX("log")
     #sorted,indes = torch.sort(log,-1)
     print(log[:, :10].shape)
     #tk_obj.set_result(result,"before_enhance")
     tk_obj.init_data_Text.delete('1.0', 'end')
     #tk_obj.init_data_Text.insert("end","test")
     tk_obj.init_data_Text.insert("end", result)
     tk_obj.result_data_Text.delete('1.0', 'end')
Ejemplo n.º 4
0
def main(args):
    pdf_file = args['pdf_file']
    pdf_images = pdf2img(pdf_file)
    images = pdf_images
    # images = select_imgs_with_form(pdf_images)
    ocr = OCR()
    for index, image in enumerate(images):
        image = correct_img_bias(image)
        fr = formRecognition(image)
        tables = fr.run()
        for i in range(len(tables)):
            table = tables[i]
            for idx, cell in enumerate(table):
                cell_img = ocr.img_enhancement(cell)
                res = ocr.text_recognition(cell_img)
                print(res, end='\n')
Ejemplo n.º 5
0
def parse_request():

    if request.method == 'POST':
        print("receiving")
        url = request.json.get('url')
        raw_string =  OCR(url)
        print(raw_string)
        translated_words = translate_text(raw_string)
        return jsonify(translated_words)
Ejemplo n.º 6
0
    def __init__(self, map_image, map_path):
        super(Map, self).__init__()

        self._map_image = map_image
        self._map_path = map_path
        self._ocr = OCR()

        self._margin_left = self.MARGIN_LEFT
        self._margin_top = self.MARGIN_TOP
        self._margin_right = self.MARGIN_RIGHT
        self._margin_bottom = self.MARGIN_BOTTOM

        self._x = OCR.INVALID_COORDINATE
        self._y = OCR.INVALID_COORDINATE
        self._width = OCR.INVALID_COORDINATE
        self._height = OCR.INVALID_COORDINATE

        self._RefreshCoordinates()
Ejemplo n.º 7
0
def upload():
    target_path = os.path.join(APP_ROOT, 'static/images/')

    if not os.path.isdir(target_path):
        os.mkdir(target_path)

    file = request.files.getlist("file")[0]
    filename = file.filename
    final_path = "".join([target_path, filename])
    file.save(final_path)

    imagepath = "./static/images/" + filename
    save_to_file = filename.split(".")
    save_to_file = "./static/images/" + save_to_file[
        0] + "_processed." + save_to_file[1]
    OCR(imagepath, save_to_file)
    #converting the image to text
    text = pytesseract.image_to_string(Image.open(save_to_file), lang='pol')
    #loading the second page
    return render_template("processed.html",
                           image_unprocessed=imagepath,
                           display=text)
Ejemplo n.º 8
0
 def __init__(self):
     OCR.__init__(self)
Ejemplo n.º 9
0
    inputmanager = InputManager(
        outDim=(2000, 1000),  # wxh
        inputtype='test',
        winKeyword='Zoom Meeting',
        crop=(325, 62, 325 + 1280, 62 + 960),  #x1,y1,x2,y2  (TL,BR)
    )

    # initialize pre process manager thread
    analyzer = AnalyzerThread(
        inputmanager,
        transIngressQueue,
        frameAverage=20,
    )

    # initialize post process modules
    ocr = OCR()
    parser = Parser()

    # initialize post process manager thread
    translator = TranslatorThread(ocr, parser, transIngressQueue)

    # start threads
    analyzer.start()
    translator.start()

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        analyzer.kill()
        translator.kill()
Ejemplo n.º 10
0
 def get_result(self):
     return "".join([
         OCR(self.to_numberic_grid(i)).match_char()
         for i in range(self.chars_len)
     ])
Ejemplo n.º 11
0
 def train(self, chars):
     for i, char in enumerate(chars):
         OCR(self.to_numberic_grid(i)).train_char(char)
Ejemplo n.º 12
0
def main():
    print('{}{:=<50}{}'.format(CP_Y, '', CP_C))
    print('{}**{}{:^46}{}**{}'.
            format(CP_Y, CP_R, 'Game Informantion Collector', CP_Y, CP_C))
    print('{}**{}{:^46}{}**{}'.
            format(CP_Y, CP_R, 'By: Abhishek Chaurasia', CP_Y, CP_C))
    print('{}{:=<50}{}'.format(CP_Y, '', CP_C))
    # Grab frames from screen or video
    # Replace it with any other frame grabber
    frame_grabber = VideoReader(args.video_path)

    # Initialization
    ocr = OCR(args.model, args.debug)
    items = {}
    n_items = 0

    keyvalues = open(args.key_info, 'r')
    # Ignore first two lines
    keyvalues.readline()
    keyvalues.readline()

    for line in keyvalues:
        item = line.split()
        # parsed info:    keyword | tx     | ty     | bx     | by
        items[n_items] = (item[0], item[2], item[4], item[6], item[8])
        n_items += 1

    ########################################
    # Ignore this section:
    # Important only when you care about printed values
    print('{:=<50}'.format(''))
    pad = (50//n_items) - 2
    for n_item in items:
        print_val(items[n_item][0], pad, n_item, len(items))
    print('\n{:-<50}'.format(''))
    ########################################

    # Get next frame
    while frame_grabber.next_frame():
        current_frame = frame_grabber.frame
        # Crop section of the frame containing value you are interested in
        for n_item in items:
            tx = int(items[n_item][1])
            ty = int(items[n_item][2])
            bx = int(items[n_item][3])
            by = int(items[n_item][4])
            key_part = current_frame[ty:by, tx:bx, :]

            # send the cropped area and get its value
            value = ocr.forward(key_part)

            # Create box around idividual ROIs
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(current_frame, str(value),
                    (tx-10,ty-10), font, 1, (255,255,255), 1)
            # cv2.rectangle(current_frame, (tx, ty), (bx, by), (0, 255, 0), 1)
            print_val(value, pad, n_item, len(items))
        print("")

        if args.debug:
            pass
        else:
            cv2.startWindowThread()
            cv2.namedWindow("Video")
        cv2.imshow('Video', current_frame)
        if args.debug:
            cv2.waitKey(1)
Ejemplo n.º 13
0
def newOcr(filepath, model):
    ocr.OCR(filepath, base_model=model)
Ejemplo n.º 14
0
def cropToOcr(filePath, recT, typeT, debug=False, isusebaidu=False):
    ocrResult = {}
    img = Image.open(filePath)

    if os.path.exists(
            jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[
                1]) == False:
        os.mkdir(
            jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[
                1])

    # 加载自识别ocr模型(增值税专票模型)可设置typeT为11加载
    model = ocr.load_model()

    for x in recT:
        sp = img.crop((recT[x][0], recT[x][1], recT[x][0] + recT[x][2], recT[x][1] + recT[x][3]))
        if recT[x][0] == 0 and recT[x][1] == 0 and recT[x][2] == 0 and recT[x][3] == 0:
            print("↑--------↑--------↑--------↑ recT : " + x + " is error↑--------↑--------↑")

            continue
        sFPN = jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[
            1] + "/" + jwkj_get_filePath_fileName_fileExt(filePath)[
                   1] + "_" + x + ".jpg"
        sp.save(sFPN)

        if debug == False:
            # if (x != 'invoiceNo'):
            # # 测试如此识别并不能修正字体不能识别的问题
            if isusebaidu:
                midResult = OcrPic(sFPN)
            else:
                midResult = newOcr(sFPN, model)
            # else:
            #     midResult = OcrNoPic(sFPN)

            print(midResult + '   isUseBaidu: ' + isusebaidu)
            ocrResult[x] = midResult

    print(ocrResult)
    pC = SemanticCorrect.posteriorCrt.posteriorCrt()

    if typeT == 11 and debug == False:
        import OcrForVat
        if ocrResult['invoiceDate'][:4] == '开票日期' or len(ocrResult['invoiceDate']) < 4:
            recT['invoiceDate'] = OcrForVat.mubanDetectInvoiceDate(filePath)['invoiceDate']
            sp = img.crop((recT['invoiceDate'][0], recT['invoiceDate'][1],
                           recT['invoiceDate'][0] + recT['invoiceDate'][2],
                           recT['invoiceDate'][1] + recT['invoiceDate'][3]))

            sFPN = jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + \
                   jwkj_get_filePath_fileName_fileExt(filePath)[
                       1] + "/" + jwkj_get_filePath_fileName_fileExt(filePath)[
                       1] + "_" + 'invoiceDateFix' + ".jpg"
            sp.save(sFPN)

            midResult = OcrPic(sFPN)

            print('invoiceDateFix: ' + midResult)
            ocrResult['invoiceDate'] = midResult

    js = InterfaceType.JsonInterface.invoice()
    if typeT == 11:
        pC.setVATParaFromVATDict(ocrResult)
        pC.startVATCrt()
        js.setValueWithDict(pC.VATdic)
        jsoni = js.dic

    else:
        pC.setTrainTicketParaFromDict(ocrResult)
        pC.startTrainTicketCrt()
        js.setValueWithDict(pC.dic)
        jsoni = js.dic

    return json.dumps(jsoni).encode().decode("unicode-escape")
Ejemplo n.º 15
0
class Map(Rectangle):
    def __init__(self, map_image, map_path):
        super(Map, self).__init__()

        self._map_image = map_image
        self._map_path = map_path
        self._ocr = OCR()

        self._margin_left = self.MARGIN_LEFT
        self._margin_top = self.MARGIN_TOP
        self._margin_right = self.MARGIN_RIGHT
        self._margin_bottom = self.MARGIN_BOTTOM

        self._x = OCR.INVALID_COORDINATE
        self._y = OCR.INVALID_COORDINATE
        self._width = OCR.INVALID_COORDINATE
        self._height = OCR.INVALID_COORDINATE

        self._RefreshCoordinates()

    def IsValid(self):
        if not self._map_image:
            return False

        if (self._x == OCR.INVALID_COORDINATE
                or self._y == OCR.INVALID_COORDINATE
                or self._width == OCR.INVALID_COORDINATE
                or self._height == OCR.INVALID_COORDINATE):
            return False

        if self._width < 0.001 or self._width > 0.05:
            return False

        if self._height < 0.001 or self._height > 0.05:
            return False

        _, _, width, height = self._GetMapGeometry()

        height_pixel_ratio = self.GetHeight() / height
        width_pixel_ratio = self.GetWidth() / width

        # The ratio should not be very different from each other, otherwise
        # we OCR'ed one of the coordinates wrong.
        if (abs(height_pixel_ratio) - abs(width_pixel_ratio)) > 0.0001:
            return False

        return True

    def Dispose(self):
        self._map_image.close()
        self._map_image = None
        self._ocr = None

    def GetPath(self):
        return self._map_path

    def GetMapImage(self):
        if not self.IsValid():
            return None

        # The reality here is we store a bit more than just the
        # map image, but the whole contents of the PDF including
        # the coordinates, logos, etc. So when the map is requested,
        # we need to slice it from this image.
        return self._CropGeometry(*self._GetMapGeometry())

    def GetX(self):
        if self._x != OCR.INVALID_COORDINATE:
            return self._x

        x, y, map_width, map_height = self._GetMapGeometry()

        offset = _BBOX_OFFSET_
        width = _BBOX_WIDTH_
        height = _BBOX_HEIGHT_

        y_offset = y + map_height

        # Getting the X coordinate from the bottom left corner because
        # it turns out that the upper left is often corrupted.
        image = self._CropGeometry(x, y_offset + offset, width, height)
        self._x = self._ocr.GetDecimalDegrees(image)

        return self._x

    def GetY(self):
        if self._y != OCR.INVALID_COORDINATE:
            return self._y

        x, y, _, _ = self._GetMapGeometry()

        offset = _BBOX_OFFSET_
        width = _BBOX_HEIGHT_
        height = _BBOX_WIDTH_

        image = self._CropGeometry(x - offset - width, y, width, height)
        image.rotate(90)
        self._y = self._ocr.GetDecimalDegrees(image)

        return self._y

    def GetWidth(self):
        if self._width != OCR.INVALID_COORDINATE:
            return self._width

        self._width = self._GetX2()
        if self._width != OCR.INVALID_COORDINATE:
            self._width -= self.GetX()

        return self._width

    def GetHeight(self):
        if self._height != OCR.INVALID_COORDINATE:
            return self._height

        self._height = self._GetY2()
        if self._height != OCR.INVALID_COORDINATE:
            self._height = abs(self._height - self.GetY())

        return self._height

    def SaveMapImageAsPNG(self, basename):
        image = self.GetMapImage()

        image.resize(image.width / 2, image.height / 2)
        image.save(filename="%s.png" % basename)

    def SaveMapImageAsTIFF(self, basename):
        image = self.GetMapImage()

        # If we don't add the alpha channel, the tiles will get an empty
        # black background.
        image.alpha_channel = True

        # Set compression to LZW, using the low level API here because
        # this function is not supported by the high level bindings yet.
        library.MagickSetCompression(image.wand, 11)

        # TODO: Ideally we should save as a GeoTIFF with the coordinates
        # tags. That would spare us one unnecessary step on the pipeline for
        # generating tiles.
        image.save(filename="%s.tif" % basename)

    def _GetX2(self):
        x, y, map_width, map_height = self._GetMapGeometry()

        offset = _BBOX_OFFSET_
        width = _BBOX_WIDTH_
        height = _BBOX_HEIGHT_

        x_offset = x + map_width
        y_offset = y + map_height

        image = self._CropGeometry(x_offset - width, y_offset + offset, width,
                                   height)

        return self._ocr.GetDecimalDegrees(image)

    def _GetY2(self):
        x, y, map_width, map_height = self._GetMapGeometry()

        offset = _BBOX_OFFSET_
        width = _BBOX_HEIGHT_
        height = _BBOX_WIDTH_

        x_offset = x + map_width
        y_offset = y + map_height

        image = self._CropGeometry(x_offset + offset, y_offset - height, width,
                                   height)
        image.rotate(90)

        return self._ocr.GetDecimalDegrees(image)

    def _RefreshCoordinates(self):
        self.GetWidth()
        self.GetHeight()

        if self.IsValid():
            return

        # We have some corrupted PDFs with the top margin
        # misplaced compared to the "sane" PDFs. We try
        # to detect the margin and ultimately, interpolate
        # the upper coordinate using degrees per pixel.
        left, top, right, bottom = _GetMargins(self._map_image)

        self._margin_left = left
        self._margin_top = top
        self._margin_right = right
        self._margin_bottom = bottom

        # Margin detection can really screw up things. I'm being
        # a bit lazy here and not checking for the scenarios that
        # can go wrong. Pokemon exception handling FTW.
        try:
            self.GetWidth()
            self.GetHeight()
        except:
            return

        if self.IsValid():
            return

        y2 = self._GetY2()
        if y2 == OCR.INVALID_COORDINATE:
            return

        pixel_width = self.WIDTH - self._margin_left - self._margin_right
        pixel_height = self.HEIGHT - self._margin_top - self._margin_bottom

        self._height = self._width / float(pixel_width) * pixel_height
        self._y = y2 + self._height

    def _CropGeometry(self, x1, y1, width, height):
        x2 = x1 + width
        y2 = y1 + height

        return self._map_image[x1:x2, y1:y2]

    def _GetMapGeometry(self):
        width = self.WIDTH - self._margin_left - self._margin_right
        height = self.HEIGHT - self._margin_top - self._margin_bottom

        return self._margin_left, self._margin_top, width, height