コード例 #1
0
ファイル: main.py プロジェクト: jayesh365/pdf-image-ocr
def main():
    pdfToImage('assets/pdfs/form.pdf')
    image = "assets/images/page.jpg"
    crop(image, (95, 634, 1557, 708), 'assets/images/date.jpg')
    crop(image, (99, 776, 1557, 1008), 'assets/images/comments.jpg')
    print(ocr('assets/images/date.jpg'))
    print(ocr('assets/images/comments.jpg'))
コード例 #2
0
def obtener_traducciones(mapa):
    img = detectar_area_variantes(mapa)['tra']['im']
    img_mejorada = cvr.preprocesar_texto_otsu(img,127,255,(3,7),3)
    text_eng = ocr(img_mejorada,'eng')
    text_fra = ocr(img_mejorada,'fra')
    traducciones = {}
    traducciones['frances']=text_fra[0]
    traducciones['ingles']=text_eng[1]
    return(traducciones)
コード例 #3
0
def main():
	image = cv2.imread("pic2.jpg")
	edged = edgeDetection(image)
	screenCnt = findContour(edged)
	scannedImage = scan(screenCnt,image)
	
	### OCR
	processedImg = preprocess(scannedImage)
	ocr(processedImg)
	
	cv2.imshow("Scanned", imutils.resize(scannedImage, height = 650))
	cv2.waitKey(0)
	cv2.destroyAllWindows()
コード例 #4
0
def menu():
    if request.method == 'POST':
        uploaded_file = request.files['img']
        filename = uploaded_file.filename
        if filename != '':
            file_ext = os.path.splitext(filename)[1]
            if (file_ext not in app.config['UPLOAD_EXTENSIONS']):
                abort(400)
            uploaded_file.save(
                os.path.join(app.config['UPLOAD_PATH'], filename))

        path = "static/uploads/" + str(filename)
        try:
            content = ocr(path)
            content = json.dumps(content)

            newFile = FileContents(name=filename, path=path, content=content)
            db.session.add(newFile)
            db.session.commit()

            data = FileContents.query.order_by(FileContents.date_created).all()
            return render_template('menu.html', data=data)
        except:
            return '<h1>There was a problem processing the receipt, make sure you uploaded the correct file</h1>'
    else:
        data = FileContents.query.order_by(FileContents.date_created).all()
        return render_template('menu.html', data=data)
コード例 #5
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_cinco(self):
        numero_cinco = """
 __
|__
 __|
"""
        self.assertEquals(5, ocr(numero_cinco))
コード例 #6
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_nove(self):
        numero_nove = """
 __
|__|
 __|
"""
        self.assertEquals(9, ocr(numero_nove))
コード例 #7
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_zero(self):
        numero_zero = """
 __
|  |
|__|
"""        
        self.assertEquals(0, ocr(numero_zero))
コード例 #8
0
    def gameResult(self):
        print('=' * 20 + 'game reslut:')
        if self.imgbg is None:
            self.updateImgbg()
        img_royale_red = cv2.imread(self.game_params.img['royale_red'])
        is_red, red_royale = self.inHere(img_royale_red, self.imgbg, 0.97)
        if is_red:
            print('red crown:', len(red_royale), red_royale)
        img_royale_blue = cv2.imread(self.game_params.img['royale_blue'])
        is_blue, blue_royale = self.inHere(img_royale_blue, self.imgbg, 0.97)
        if is_blue:
            print('blue crown:', len(blue_royale), blue_royale)

        sign = ''
        img_win_blue = cv2.imread(self.game_params.img['winner_blue'])
        win_blue, _ = self.inHere(img_win_blue, self.imgbg, 0.95)
        if win_blue:
            print('blue win!', end=' ')
            sign = '+'
        img_win_red = cv2.imread(self.game_params.img['winner_red'])
        win_red, _ = self.inHere(img_win_red, self.imgbg, 0.95)
        if win_red:
            print('red win!', end=' ')
            sign = '-'
        cups_img = self.grabScreen(self.game_params.game_area_left + 310,
                                   self.game_params.game_area_top + 410,
                                   self.game_params.game_area_left + 310 + 24,
                                   self.game_params.game_area_top + 410 + 15)
        # cups_img.show()
        cups_str = ocr(cups_img, type='digits')
        print(cups_str)
        print('=' * 20 + 'game over')
コード例 #9
0
def ocr_image(cache, url, codelang):
    # This is checked in bot_listening but must be redone here, so if
    # the ocr for the same page is asked multiple time, we will do the ocr
    # only once.
    text = get_from_cache(cache, url, codelang)
    if text:
        return ret_val(0, text)

    url = url.encode('utf-8')

    cache_key = image_key(url)

    lang = ocr.tesseract_languages.get(codelang, 'eng')

    basename = os.path.expanduser('~/tmp') + '/tesseract/image_%s' % cache_key

    image_filename = basename + ".jpg"

    utils.copy_file_from_url(url, image_filename)
    if not os.path.exists(image_filename):
        return ret_val(1, "could not download url: %s" % url)

    text = ocr.ocr(image_filename, basename, lang)
    if text == None:
        return ret_val(2, "ocr failed")

    os.remove(image_filename)
    if os.path.exists(basename + ".txt"):
        os.remove(basename + ".txt")

    cache.set(cache_key, text)

    return ret_val(0, text)
コード例 #10
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_seis(self):
        numero_seis = """
 __
|__  
|__|
"""
        self.assertEquals(6, ocr(numero_seis))
コード例 #11
0
ファイル: main.py プロジェクト: hj1115hj/namecard-project
def test2():
    body = request.json
    #pprint.pprint(body)

    image_url = body['action']['detailParams']['namecard']['origin']

    #image_url =image_url.startswith('http://dn-m.talk.kakao.com/talkm')
    #print(image_url)
    with urllib.request.urlopen(image_url) as input:
        with open('./image.jpeg', 'wb') as output:
            output.write(input.read())

    result_file = ocr('./image.jpeg')
    #print(result_file)
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
    result_string = pytesseract.image_to_string(result_file, lang='kor+eng')

    result_string = result_string.strip().replace(' ', '')
    print(result_string)
    return {
        "version": "2.0",
        "template": {
            "outputs": [{
                "simpleText": {
                    "text": result_string
                }
            }]
        }
    }
コード例 #12
0
ファイル: table.py プロジェクト: nandandutta/bas_mat_dekho
    def build(self):

        if self.cells is None:
            self.find_cells()

        ocr_data = ocr(self.image)

        table_data = []
        for row in self.cells:
            table_data.append([])
            for _ in row:
                table_data[-1].append([])

        for i, data in ocr_data.iterrows():
            centroid = get_centroid(data['left'], data['left'] + data['width'],
                                    data['top'], data['top'] + data['height'])

            cell = self.find_cell_for_point(centroid)
            if cell is not None:
                table_data[cell[0]][cell[1]].append(data['text'])

        for i, row in enumerate(table_data):
            for j, cell in enumerate(row):
                if len(cell) == 0:
                    table_data[i][j] = None
                else:
                    table_data[i][j] = ' '.join(cell)

        self.data = table_data
コード例 #13
0
    def do_POST(self):
        formData = cgi.FieldStorage(self.rfile,
                                    headers=self.headers,
                                    environ={'REQUEST_METHOD': 'POST'})
        content = formData.getvalue('content')
        img = Image.open(BytesIO(content))
        base_name = str(int(time.time()))
        image_name = base_name + '.bmp'
        img.save(image_name)
        ocr_image = ocr.ocr(image_name, base_name)
        ocr_name = base_name + '.png'
        Image.fromarray(ocr_image).save(ocr_name)
        blob = storage.upload(CONFIG.project_id,
                              CONFIG.buket,
                              ocr_name,
                              service_account_file='service_account.json')
        signed_url = storage.signed_url_for_png(blob)
        shutil.os.remove(image_name)
        shutil.os.remove(base_name + '.tsv')
        shutil.os.remove(ocr_name)
        self.send_response(HTTPStatus.OK)
        self.send_header('Content-type', 'application/json')
        self.end_headers()
        responseBody = json.dumps({'url': signed_url})

        self.wfile.write(responseBody.encode('utf-8'))
コード例 #14
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_tres(self):
        numero_tres="""
 __
 __|
 __|
"""
        self.assertEquals(3, ocr(numero_tres))
コード例 #15
0
ファイル: ocrdaemon.py プロジェクト: Aubreymcfato/phetools
def ocr_image(cache, url, codelang):
    # This is checked in bot_listening but must be redone here, so if
    # the ocr for the same page is asked multiple time, we will do the ocr
    # only once.
    text = get_from_cache(cache, url, codelang)
    if text:
        return ret_val(0, text)

    url = url.encode('utf-8')

    cache_key = image_key(url)

    lang = ocr.tesseract_languages.get(codelang, 'eng')

    basename = os.path.expanduser('~/tmp') + '/tesseract/image_%s' % cache_key

    image_filename = basename + ".jpg"

    utils.copy_file_from_url(url, image_filename)
    if not os.path.exists(image_filename):
        return ret_val(1, "could not download url: %s" % url)

    text = ocr.ocr(image_filename, basename, lang)
    if text == None:
        return ret_val(2, "ocr failed")

    os.remove(image_filename)
    if os.path.exists(basename + ".txt"):
        os.remove(basename + ".txt")

    cache.set(cache_key, text)

    return ret_val(0, text)
コード例 #16
0
ファイル: solve.py プロジェクト: AjayUmakanth/ScanNSolve
def solve(path):
    eq = ocr.ocr(path)
    if (detect.isArith(eq)):
        if (detect.isArithEq(eq)):
            return (f"\t\t{arith.arithEq(eq)}\n")
        else:
            if (arith.arithIneq(eq)):
                return ("\t\tTrue\n")
            else:
                return ("\t\tFalse\n")
    else:
        nterm, var = alg.checknovar(eq)
        if (nterm == 1):
            pow = alg.checkdegree(eq, var[0])
            if (pow == 1):
                return (linear(eq, var[0]))
            elif (pow == 2):
                return (quadratic(eq, var[0]))
            elif (pow == 3):
                return ("Cubic equation\n")
            else:
                return ("Can solve up to 3rd degree polynomial only!!\n")
        elif (nterm == 2):
            return (simult(eq, var, nterm))
        elif (nterm == 3):
            print("Simultaneous equation three variables\n")
        else:
            print("Can solve equations upto 3 vaiables only!!!\n")
コード例 #17
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_sete(self):
        numero_sete = """
 __
   |
   |
"""
        self.assertEquals(7, ocr(numero_sete))
コード例 #18
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_oito(self):
        numero_oito = """
 __
|__|
|__|
"""
        self.assertEquals(8, ocr(numero_oito))
コード例 #19
0
def do_one_page(opt, page_nr, filename):
    tiff_name = extract_image(opt, page_nr, filename)
    if not tiff_name:
        return

    out_filename = opt.out_dir + "page_%04d" % page_nr
    if opt.config == 'hocr':
        out_filename += '.html'
    else:
        out_filename += '.txt'

    ocr.ocr(tiff_name, opt.out_dir + 'page_%04d' % page_nr, opt.lang, opt.config)
    if opt.compress:
        utils.compress_file(out_filename, out_filename, opt.compress)
        os.remove(out_filename)

    os.remove(tiff_name)
コード例 #20
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_dez(self):
        numero_dez = """
      __
   | |  |
   | |__|
"""
        print repr(numero_dez)
        self.assertEquals(10, ocr(numero_dez))
コード例 #21
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_um(self):
        numero_um = """
   
   |
   |
"""
        
        self.assertEquals(1, ocr(numero_um))
コード例 #22
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_quatro(self):
        numero_quatro="""

|__|
   |
"""
        
        self.assertEquals(4, ocr(numero_quatro))
コード例 #23
0
def obtener_titulo(mapa):
    img = detectar_area_variantes(mapa)['tit']['im']
    img_mejorada = cvr.preprocesar_texto_adapta(img, 255,71,30)
    text = ocr(img_mejorada,'spa')
    titulo = {}
    titulo['numero']=text[0].replace(' ','')
    titulo['ententrada']=[' '.join(text[1:])][0]
    return(titulo)
コード例 #24
0
ファイル: test_ocr.py プロジェクト: abeltje1/dojo
    def test_numero_dois(self):
        numero_dois = """
 __
 __|
|__
"""
        
        self.assertEquals(2, ocr(numero_dois))
コード例 #25
0
ファイル: ocr_djvu.py プロジェクト: WeftWiki/phetools
def do_one_page(opt, page_nr, filename):
    tiff_name = extract_image(opt, page_nr, filename)
    if not tiff_name:
        return

    out_filename = opt.out_dir + "page_%04d" % page_nr
    if opt.config == 'hocr':
        out_filename += '.hocr'
    else:
        out_filename += '.txt'

    ocr.ocr(tiff_name, opt.out_dir + 'page_%04d' % page_nr, opt.lang, opt.config)
    if opt.compress:
        utils.compress_file(out_filename, out_filename, opt.compress)
        os.remove(out_filename)

    os.remove(tiff_name)
コード例 #26
0
def getyzm():
    print("getyzm")
    fp=io.BytesIO(a.get("https://isisn.nsfc.gov.cn/egrantindex/validatecode.jpg",result=False,o=True).content)
    image=Image.open(fp)
    yzm = ocr(image)
    try:
        return  'success' in a.post("https://isisn.nsfc.gov.cn/egrantindex/funcindex/validate-checkcode","checkCode="+yzm).text, yzm
    except:
        return False,''
コード例 #27
0
ファイル: shinyhunter.py プロジェクト: DBJoran/Shinyhunter
def getPokemonInfo():
    toolkit.takeScreenshot()
    img = ds.getScreenshot()
    pokemonName = ocr.ocr('pokemon', img).lower()
    x = ds.getShinyListX(pokemonName)
    y = ds.getShinyListY(pokemonName)
    shinyColor = ds.getShinyListColor(pokemonName)

    return x, y, shinyColor
コード例 #28
0
    def test_image(self):
        for name, expected in cases.items():
            image = os.path.join("screenshots", name)
            out, err = ocr(image)
            if "err" in expected:
                assert err == expected["err"]
            else:
                assert err == b""
            assert expected["ocr_out"] in out
        # ocrout = os.path.join("screenshots", name) + ".txt"
        # with open(ocrout, encoding='utf-8') as fh:
        #     expectedoutput = fh.read()
        # assert out == expectedoutput

        image = os.path.join("screenshots", "view.jpg")
        out, err = ocr(image)
        assert err == b"Warning. Invalid resolution 0 dpi. Using 70 instead.\n"
        assert  b' \n\n' == out
コード例 #29
0
        def workerThreadFunc():
            self.ongoing = True

            print('==========')
            print('光学字符识别')
            print('启动时间:' + time.asctime(time.localtime(time.time())))
            print('共找到文件数:' + str(int(len(ocr.file_list)) - 1))
            Counts = (int(len(ocr.file_list)) - 1)
            time.sleep(0.5)
            self.ui.progressBar.setRange(0, Counts)
            self.ui.progressBar.setValue(0)
            print('==========')
            ocr.textlist.append('==========')
            ocr.textlist.append('光学字符识别')
            ocr.textlist.append('启动时间:' +
                                time.asctime(time.localtime(time.time())))
            ocr.textlist.append('共找到文件数:' + str(int(len(ocr.file_list) - 1)))
            ocr.textlist.append('==========')
            count = 1
            for ocr.path in ocr.file_list:
                print(ocr.file_list)

                Counts = int(Counts)
                if ocr.path == "C:\\MonianHello\\list.txt":
                    continue
                try:
                    # ocr.transimg(ocr.path)
                    time.sleep(0.5)
                    self.ui.progressBar.setValue(count)
                    print('{0} / {1}'.format(str(count), str(Counts)))
                    ocr.ocr(ocr.path)
                    count += 1
                except:
                    print('出现内部错误')
            for i in ocr.textlist:
                fl.write(i)
                fl.write('\n')
            fl.write('结束时间:' + time.asctime(time.localtime(time.time())))
            fl.close()
            print('写入成功,已将文件写入' + str(ocr.outputtext))
            print('结束时间:' + time.asctime(time.localtime(time.time())))
            print('识别完成,现在将结果写入文件...')
            time.sleep(5)
            self.ongoing = False
コード例 #30
0
def upload_file():
    if request.method == 'POST':
        f = request.files['file']
        file_name = f.filename
        f.save(file_name)
        text = str(ocr(str(file_name)))
        text = re.findall(r'\w+', text)
        text = ' '.join(text)
        df = cosine_similarity_T(10, text).head(6)
        return render_template('index.html', frame=df)
コード例 #31
0
 def __init__(self, image, caption=None):
     self.image = image
     self.caption = caption
     # Photo OCR if we don't have an input caption
     if not self.caption:
         self.caption = ocr.ocr(self.image)
     # Make image dimensions uniform
     if len(self.image) != dimX or len(self.image[0]) != dimY:
         self.image = alignDimensions.alignDimensions(
             self.image, dimX, dimY)
コード例 #32
0
ファイル: mainInterface.py プロジェクト: aCrazyCoder/pyOCR
    def __init__(self):
        """
        初始化
        :return: null
        """
        # 超类初始化
        super().__init__()

        # UI初始化
        self.ui = Ui_mainWidget()
        self.ui.setupUi(self)
        self.grabKeyboard()
        self.setMouseTracking(True)
        self.setWindowFlags(Qt.FramelessWindowHint)
        self.setWindowIcon(QIcon('OCR.ico'))

        # 初始化相机
        self.camera = QCamera()
        self.imageCapture = QCameraImageCapture(self.camera)
        self.viewsetting = QCameraViewfinderSettings()
        self.initimplement()

        # 初始化标题栏
        self.initTitleBar()

        # 初始化系统托盘
        self.tray = QSystemTrayIcon()
        self.tray.setIcon(QIcon('OCR.ico'))
        self.initTray()

        # OCR识别部分
        self.OCR = ocr()
        self.OCR.setappid('1257206643')
        self.OCR.setsecretid('AKIDFTddWEg9Ncsz0sE7oOpBNOExdDdeCUJ3')
        self.OCR.setsecretkey('FQitsgUND8yfrZK0RrBMOJB5tWhCm5Ol')

        # 初始化登录部分
        self.logWidget = QWidget()
        self.logui = Ui_Form()
        self.logui.setupUi(self.logWidget)
        self.logWidget.setWindowFlags(Qt.FramelessWindowHint)
        self.logWidget.setWindowModality(Qt.ApplicationModal)
        self.logui.close_btn.clicked.connect(self.logWidget.close)

        # 初始化变量
        self.mousePressd = False
        self.mousePoint = None
        self.result = {}
        self.isFirst = False
        self.ocrType = ocrType.ocr_general  # 默认为印刷体识别

        # 初始化字定义信号连接
        self.processFinished.connect(self.updateOCRInfo)
        self.ui.btn_login.clicked.connect(self.logWidget.show)
        self.ui.comboBox_choose.currentIndexChanged.connect(self.changeocrType)
コード例 #33
0
 def getCups(self):
     cups_img = self.grabScreen(self.game_params.game_area_left + 230,
                                self.game_params.game_area_top + 55,
                                self.game_params.game_area_left + 230 + 50,
                                self.game_params.game_area_top + 57 + 16)
     # cups_img.show()
     cups_str = ocr(cups_img, type='digits')
     # print(cups_str)
     if cups_str == '':
         return 0
     return cups_str
コード例 #34
0
ファイル: snip.py プロジェクト: knkm3001/OCR-Snipper
    def capture(self):

        # 右下から左上に選択したときの処理
        if self.startPos.x() > self.endPos.x():
            self.startPos, self.endPos = self.endPos, self.startPos

        # QPixmap で選択座標をコピー
        pmap = self.snipScreen.copy(QRect(self.startPos, self.endPos))

        # QPixmap -> Qimage -> binary
        qimg = pmap.toImage()
        image_format = 'PNG'
        bits = QByteArray()
        buffer = QBuffer(bits)
        qimg.save(buffer, image_format)

        # OCR
        ocr(buffer.data(), self.args.debugmode, self.gcp_eng,
            self.is_cut_newline)

        self.close()
コード例 #35
0
ファイル: server.py プロジェクト: mariusfa/python-ocr
def ocr_endpoint():
    if request.method == 'POST':
        file = request.files['file']
        if file.filename == '':
            return 'no file selected'
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)
        text = ocr(file_path)
        os.remove(file_path)
        return text
    else:
        return "Welcome to ocr"
コード例 #36
0
def make_predict():
    #return jsonify(request.get_json())
    #data = request.get_json(force=True)
    #return '123'
    #predict_request = [str(x) for x in request.form.values()]
    #predict_request = np.array(predict_request)
    #delay_predicted = model.predict(predict_request)
    #output = predict(data['string'])
    #predict_request = [str(x) for x in request.form.values()]
    #predict_request = predict_request[0]
    #output = predict(translate(predict_request[0],'en'))
    output = predict(translate(str(request.form.get('headline')), 'en'))
    #return '123'
    #return str(output)
    #return jsonify(str(output))
    #url = predict_request[1]
    url = str(request.form.get('url'))
    if (request.form.get('headline') != ''):
        if (output):
            result = "decrease"
        else:
            result = "increase"


##        return render_template('index.html', prediction_text='Stock Price will {}'.format(result))
    elif (request.form.get("url") != ''):
        toi_article = Article(url, language="en")  # en for English
        toi_article.download()
        #toi_article.parse()
        output_2 = predict(toi_article.title)
        if (output_2):
            result = "decrease"
        else:
            result = "increase"
    else:
        file = request.files['file']
        #filename = secure_filename(file.filename)
        text = ocr(file.filename)
        output_3 = predict(text)
        if (output_3):
            result = "decrease"
        else:
            result = "increase"
    return render_template('index.html',
                           prediction_text=translate(
                               'Stock Price will {}'.format(result), lang),
                           Prediction=translate('Stock Market Prediction',
                                                lang),
                           Predict=translate('Predict', lang),
                           Plot=translate('plot', lang))
コード例 #37
0
ファイル: app.py プロジェクト: jesskeepswimming/Scribbit
def display():
    global UPLOAD_FOLDER, FILE_NAME, ocrtext
    ocrtext = ocr(FILE_NAME)

    f = open('test.txt', 'w+')
    f.write(ocrtext)
    f.close()

    info = label(ocrtext)

    with open('test.json', 'w+') as g:
        json.dump(info, g)

    return render_template('display.html', ocrtext=info)
コード例 #38
0
ファイル: audioui 拷貝.py プロジェクト: Rtshaw/E-See
 def run(self):
     if self.device.isOpened():
         try:
             while True:
                 ret, frame = self.device.read()
                 height, width, bytesPerComponent = frame.shape
                 bytesPerLine = bytesPerComponent * width
                 # 變換彩色空間順序
                 cv2.cvtColor(frame, cv2.COLOR_BGR2RGB, frame)
                 # 轉為QImage物件
                 image = QImage(frame.data, width, height, bytesPerLine,
                                QImage.Format_RGB888)
                 if self.paizhao == 1:
                     image.save('./origin/takephoto.png')
                     self.paizhao = 0
                     pixmap = QPixmap.fromImage(image)
                     pixmap = pixmap.scaled(400, 300,
                                            QtCore.Qt.KeepAspectRatio)
                     self.imgLab.setPixmap(pixmap)
                     ocr()
                     outcome('./result/audio/outcome.txt')
         finally:
             self.device.release()
コード例 #39
0
def test():
    r = request
    print(r.files, flush=True)
    file = r.files['image']
    path = os.getcwd() + '/last.jpg'
    file.save(path)
    result = ocr.ocr(path)
    response = {'ocr_result': result}
    # encode response using jsonpickle
    response_pickled = jsonpickle.encode(response)

    return Response(response=response_pickled,
                    status=200,
                    mimetype="application/json")
def do_ocr():
    id = uuid.uuid4()
    path = "/tmp/"+str(id)+".png"

    nparr = np.fromstring(request.data, np.uint8) #frombuffer?
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    cv2.imwrite(path,img)
    data = {}
    text, coordinates = ocr.ocr(path)
    data["text"] = text
    data["coordinates"]=coordinates
    response = json.dumps(data)

    return Response(response=response, status=200, mimetype="application/json")
コード例 #41
0
ファイル: login.py プロジェクト: situ2001/involution-tools
def login(username: str, password: str):
    session = msession.session
    res = session.get(msession.urls.cas, verify=False)
    lt = re.findall(r'name="lt" value="(.*)"', res.text)

    captcha_url = msession.urls.captcha
    captcha_path = 'captcha.jpg'
    with session.get(captcha_url) as captcha:
        with open(captcha_path, mode='wb') as captcha_jpg:
            captcha_jpg.write(captcha.content)
    #captcha = input('验证码已生成于该目录下,请查看\n并输入验证码')
    captcha = ocr(captcha_path)

    login_form = {
        'username': username,
        'password': password,
        'captcha': captcha,
        'warn': 'true',
        'lt': lt[0],
        'execution': 'e1s1',
        '_eventId': 'submit',
        'submit': '登录'
    }

    post_res = session.post(msession.urls.cas, data=login_form)

    if '账号或密码错误' in post_res.text:
        print('账号或密码错误')
        return

    if '验证码不正确' in post_res.text:
        print('验证码不正确')
        return

    os.remove('captcha.jpg')

    session.get(msession.urls.sso, verify=False)

    cookies = session.cookies

    if not os.path.exists('cookies'):
        os.mkdir('cookies')

    if not cookies:
        print('No cookies!')
    else:
        file_name = 'cookies' + os.sep + username
        with open(file_name, mode='wb') as cookies_file:
            pickle.dump(session.cookies, cookies_file)
コード例 #42
0
def handler(event, context):
    """
    download tar.gz, do ocr and upload it to configured cloud service
    (currently AWS S3 or Google Drive)
    """

    upload_type = os.environ.get('UPLOAD_TYPE', None)
    if upload_type == 'gdrive':
        for k in [
                'GDRIVE_CLIENT_ID', 'GDRIVE_CLIENT_SECRET',
                'GDRIVE_REFRESH_TOKEN'
        ]:
            assert k in os.environ, "missing {} in environment vars".format(k)
    elif upload_type == 's3':
        assert 'S3_BUCKET' in os.environ
    elif upload_type == 'discard':
        pass
    else:
        raise Exception('unknown upload type {}'.format(
            os.environ['UPLOAD_TYPE']))

    empty_page_threshold = int(os.environ.get('EMPTY_PAGE_THRESHOLD', 200))
    language = os.environ.get('TESSERACT_LANG', 'eng')

    import boto3, ocr
    s3 = boto3.client('s3')
    for record in event['Records']:
        src_bucket = record['s3']['bucket']['name']
        src_file = record['s3']['object']['key']

        s3.download_file(src_bucket, src_file, "/tmp/scan.tar.gz")
        pdf_file = ocr.ocr("/tmp/scan.tar.gz", empty_page_threshold, language)

        dest_filename = src_file.split('.')[0] + '.pdf'
        if upload_type == 's3':
            bucket = os.environ['S3_BUCKET']
            s3.upload_file(pdf_file, bucket, dest_filename)
        elif upload_type == 'gdrive':
            folder = os.environ.get('GDRIVE_FOLDER', None)
            client_id = os.environ['GDRIVE_CLIENT_ID']
            client_secret = os.environ['GDRIVE_CLIENT_SECRET']
            refresh_token = os.environ['GDRIVE_REFRESH_TOKEN']
            upload_gdrive(pdf_file, dest_filename, client_id, client_secret,
                          refresh_token, folder)
        elif upload_type == 'discard':
            print('all fine, discarding file, but not deleting source file')
            return
        s3.delete_object(Bucket=src_bucket, Key=src_file)
        os.remove(pdf_file)
コード例 #43
0
ファイル: main.py プロジェクト: bishogasaad/IP-project
def main():
    program_options = processArguments()
    if program_options['help']:
        printUsage()
        exit()
    if len(program_options['image_paths']) == 0:
        program_options['image_paths'].append('car.JPG')
        print('Warning: Missing image path. Will use default image (' +
              program_options['image_paths'][0] + ')')
        printUsage()
    json_output = []
    for i, image_path in enumerate(program_options['image_paths']):
        img = cv2.imread(image_path)
        plate = detectplate(img)
        if plate.size == 0:
            if program_options['output'] == 'json':
                output = {
                    'image_path': image_path,
                    'plates': [],
                    'error': 'Error: cannot detect plate in ' + image_path
                }
                json_output.append(output)
            else:
                print('Error: cannot detect plate in ' + image_path)
            continue
        chars = cropChars(plate)
        if not program_options['silent']:
            cv2.imshow('plate' + str(i), plate)
            cv2.imshow('chars' + str(i), chars)
        chars, x_cords = segmentCharacters(chars)
        dic = sortingCharacters(chars, x_cords)
        chars_text = []
        for index, x in dic:
            char_img = chars[index]
            char_img = cv2.bitwise_not(char_img)
            chars_text.append(ocr(char_img))
            # cv2.imshow('char',char_img)
            # cv2.waitKey(0)
            # cv2.imwrite('./output/'+str(x)+'.png', char_img)
        if program_options['output'] == 'json':
            output = {'image_path': image_path, 'plates': [chars_text]}
            json_output.append(output)
        else:
            print(' '.join(chars_text))
    if program_options['output'] == 'json':
        print(json.dumps(json_output, indent=4))
    if not program_options['silent']:
        cv2.waitKey(0)
        cv2.destroyAllWindows()
コード例 #44
0
def main():
    #screenshot.snapscreen() #截图,保存到Steam目录
    #print_time('Screenshot')
    raw_location = screenshot.get_raw_location()  #原图片的位置
    #print(raw_location)
    region.region(raw_location)  #处理截图,保存到cache文件夹
    #print_time('Region')
    img_location = region.get_img_location()  #bar图的位置
    #print(img_location)
    items = ocr(img_location)  #物品名称列表
    #print_time('OCR')
    info = output.get_info(items)  #获得价格列表
    #print_time('Get_info')

    out_GUI.popwindow(info)  #输出到窗口中
コード例 #45
0
ファイル: main.py プロジェクト: mini-hiori/iidxOcrAnalyze
def main():
    for i in range(50):
        image_number: str = str(i + 150).zfill(2)
        target_image_path = f"movie_frames/0{image_number}.jpg"
        trimed_image_path = trim_image(target_image_path)
        image_texts: List[str] = ocr(trimed_image_path)
        music_name = search_music_name(image_texts)
        clear_rate = search_clear_rate(image_texts)
        fullcombo_rate = search_fullcombo_rate(image_texts)
        # print(image_texts)
        print(image_number)
        print(f"music_name:{music_name}")
        print(f"clear_rate:{clear_rate}")
        print(f"full_combo_rate:{fullcombo_rate}")
        print()
コード例 #46
0
ファイル: nn.py プロジェクト: kushaltirumala/ocr
#small test function; the inner loop
# tests each combination of # hidden nodes
# a 100 times, and the outerloop takes the
# average of those 100 times, and uses that
# as the general accuracy for that many hidden nodes in the ANN
def test(dm, dl, indices, nn):
    avg = 0
    for i in xrange(100):
        rightguesses = 0
        for j in indices:
            test = dm[j]
            prediction = nn.predict(test)
            if dl[j] == prediction:
                rightguesses += 1

        avg += (rightguesses / float(len(indices)))
    return avg / 100

#opens the data sets
dm = np.loadtxt(open('data.csv', 'rb'), delimiter = ',').tolist()
dl = np.loadtxt(open('dataLabels.csv', 'rb')).tolist()

#goes through every combination of # of hidden nodes and checks
# the accuracy of the ANN (prints it out)
for i in xrange(5, 100, 10):
    nn = ocr(i, indices, dl, dm, False)
    performance = test(dm, dl, indices, nn)
    print "" + str(i) + " Hidden Nodes --> " + str(performance) 


コード例 #47
0
ファイル: kloby.py プロジェクト: zachriggle/pub-whiz
data   = urllib2.urlopen(url).read()
soup   = BeautifulSoup(data)

images = soup.findAll('img')
want   = [i for i in images if i.attrs['height']=='960'][0].attrs['src']


if 'http' not in want:
  want = 'http://www.klobysbbq.com/' + want

imageData = urllib2.urlopen(want).read()
imgFile   = tempfile.mktemp(os.path.basename(want))

file(imgFile,'wb+').write(imageData)

ocrData = ocr.ocr(imgFile)

beerNames = []

for line in ocrData.split('\n'):

  if len(line.strip()) == 0:  continue
  if 'Today' in line:         continue

  line = re.sub(percent, "", line)
  line = re.sub(quoteToEnd, "", line)
  line = re.sub(alphaNumSpace, "", line)
  line = re.sub(lowerUpperLower, "\\1 \\2 \\3", line)
  beerNames.append(line)

results = SearchForBeers(beerNames)