コード例 #1
0
ファイル: doc_extractor.py プロジェクト: brs1977/icr_doc
def get_cell_digits(x, y, table_struct, img):
    cell = cell_table_image(x, y, table_struct, img)
    # cv2_imshow(cell)
    # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='digits1')
    text = ocr(cell, config='--oem 1 --psm 7', lang='digits')
    # text = text2num(text)
    return text
コード例 #2
0
ファイル: eoz.py プロジェクト: zhp1254/candy
def _get_captcha(session):
    url = 'http://eoz.one/user/captcha?refresh=1'
    r = utils.get(session, url)
    url = r.json()['url']
    url = 'http://eoz.one' + url
    r = utils.get(session, url)
    return utils.ocr(r.content)
コード例 #3
0
def add_reminders(img_path):
    img = open_image(img_path)
    ocr_text = ocr(img)

    menu_items = text_to_menu_items(ocr_text)[:4]
    for date, food in menu_items:
        add_reminder(date, food)
    return menu_items
コード例 #4
0
ファイル: doc_extractor.py プロジェクト: brs1977/icr_doc
def get_cell_text(x, y, table_struct, img):
    cell = cell_table_image(x, y, table_struct, img)
    # cv2_imshow(cell)
    # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='rus2+digits1')
    # return ocr(cell, config='--oem 1 --psm 6 -c textord_heavy_nr=true', lang='rus+digits1')
    text = ocr(cell,
               config='--oem 1 --psm 6 -c textord_heavy_nr=true',
               lang='rus')
    text = error_correct(text)
    return text
コード例 #5
0
    def test_baidu_index(self):
        baidu_index_page = page.BaiduIndexPage(self.driver, self.action)
        baidu_index_page.openPage("https://index.baidu.com/")
        baidu_index_page.input_search_key("start")
        baidu_index_page.click_submit_button()
        baidu_index_page.maxWindows()
        '''
        理想化的函数:
        @param1: 地区:全国, 北京,香港
        @param2: 查询日期:2018-09
        @param3: 查询关键字: 赵丽颖+zhaoliyig

        @output: void 处理过程中将结果转换成csv
        '''
        df = pd.DataFrame(
            columns=['key_words', 'date_range', 'index_type', 'baidu_index'])
        i = 0
        for keywords in utils.setAllKeywords():
            for date in utils.setAllDate():
                # try:
                baidu_index_page.input_new_search_key(keywords)
                baidu_index_page.click_new_submit_button()
                time.sleep(1)
                # 开始自定义时间
                self.inputSelfDefineDate(baidu_index_page, Constant.YEAR_2011,
                                         Constant.OCT, Constant.YEAR_2012,
                                         Constant.OCT)
                baidu_index_page.click_index_average()

                for indexType in ["Total", "PC", "Mobile"]:
                    filename = keywords + '.' + indexType + '.' + date
                    if "PC" == indexType:
                        baidu_index_page.click_pc_index_button()
                    if "Mobile" == indexType:
                        baidu_index_page.click_mobile_index_button()
                    baidu_index_page.hoverOnAvgIndex(3)
                    baidu_index_page.hoverOnAvgIndex(4)
                    baidu_index_page.saveThePicture(filename)
                    time.sleep(1)
                    avg_index = utils.ocr(Constant.IDENTIIFIED_PICTURE_FOLDER +
                                          filename + ".png").replace(",", "")
                    df.loc[i] = [keywords, date, indexType, avg_index]
                    i += 1
            # except Exception as e:
            #     print(keywords+date+"get failed")
            #     print(e)
            #     time.sleep(60*60)
        df.to_csv(Constant.FINAL_RESULT_DIR + Constant.FINAL_RESULT_FILE_NAME,
                  index=False,
                  sep=',')
コード例 #6
0
def classify_image(img_path):
    img = open_image(img_path)
    ocr_text = ocr(img)

    if 'mosaicscience' in ocr_text:
        return 'mosaic'

    days_mentioned = [
        token for line in ocr_text.split('\n') for token in line.split(' ')
        if token in DAYS
    ]
    if len(days_mentioned) > 5:
        return 'food'

    return 'trustnet'
コード例 #7
0
	def on_status(self, status, firstcall=True):
		# Save ID of first tweet with @mention to reply to
		if firstcall:
			self.id = status.id

			# One or multiple captions?
			opts = ['tell me more', 'what else', 'is that all', 'can you do better']
			if any(substr in status.text.lower() for substr in opts):
				self.more = True
			else:
				self.more = False

		# If tweet has an image, tag this one
		if hasattr(status, 'extended_entities'):
			print(status.text)

			# Setup DenseCap
			headers = {
				'api-key': self.key,
			}

			# Iterate through each image in tweet
			caption = []
			for each in status.extended_entities['media']:
				# Get image URL
				img = each['media_url_https']
				files = {
					'image': img,
				}

				# Call DenseCap API
				response = requests.post('https://api.deepai.org/api/densecap', headers=headers, files=files).json()

				# Call tesseract for OCR
				text = ocr(img)

				# Construct caption
				if 'output' in response:
					if not self.more:
						# Take first caption provided
						cap = response['output']['captions'][0]['caption']
					else:
						# Take next 3 captions
						cap = '; '.join([response['output']['captions'][i]['caption'] for i in range(1, 4)])

					# Add OCR text if detected
					if text is not None:
						cap = 'The image shows ' + cap + ' and it says ' + text

					caption.append(cap)

			if caption:
				# Build tweet
				if len(caption) == 1:
					tweet = caption[0]
				else:
					tweet = ''
					for i, c in enumerate(caption):
						tweet += f'[{i+1}] {c}\n'
				print(tweet)

				# Tweet (reply) the response
				if len(tweet) <= 280:
					self.api.update_status(tweet, in_reply_to_status_id=self.id, auto_populate_reply_metadata=True)
				else:
					tweets = tweetsplitter(tweet)
					prev = self.id
					for t in tweets:
						latest = self.api.update_status(t, in_reply_to_status_id=prev, auto_populate_reply_metadata=True)
						prev = latest.id

		# If tweet has no image, try and find parent tweet with image
		else:
			if status.in_reply_to_status_id is not None:
				parent = self.api.get_status(status.in_reply_to_status_id)

				# Recursive call
				self.on_status(parent, False)
コード例 #8
0
def read_title(img):
    ocr_text = ocr(img)
    ocr_title = ocr_text.split('\n\n')[0]
    return ocr_title
コード例 #9
0
ファイル: main.py プロジェクト: wptoux/Chiji
            score, pos = _match(img, tp_startup)

            if score > 0.9:
                utils.tap((pos[0] + pos[1]) / 2, (pos[2] + pos[3]) / 2)
                state.chg_state('run')
                time.sleep(1)
            else:
                time.sleep(0.3)

        elif state.state == 'run':
            score, pos = _match(img, tp_main)
            if score > 0.20:
                simg = img[content_offset:, :, :]
                print('Call ocr...', end=' ')
                st = time.time()
                ocr_rst = utils.ocr(simg, regions)
                et = time.time()
                print('Time: %.2f' % (et - st))
                print('Question:', ocr_rst)

                if '' not in ocr_rst:
                    question, opt1, opt2, opt3, opt4 = ocr_rst

                    with env.begin() as txn:
                        ans = txn.get(question.encode())

                    if ans is None:
                        select = random.randrange(1, 5)
                    else:
                        ans = ans.decode()
                        print('Memory hit!', ans)