async def parse(self, filename): behavior_log("Start processing image {name}".format(name=filename)) blurred_img = self._enhance_image(filename, blur=True) non_blurred_img = self._enhance_image(filename, blur=False) blur_index, non_blur_index = 0, 1 loop = asyncio.get_running_loop() with ProcessPoolExecutor(max_workers=2) as pool: futures = [ loop.run_in_executor( pool, partial(self.sharpen_image_and_run_ocr, blurred_img)), loop.run_in_executor( pool, partial(self.sharpen_image_and_run_ocr, non_blurred_img)) ] result = await asyncio.gather(*futures) items = [ self.extract_items(result[i].splitlines(True)) for i in range(len(result)) ] if len(items[blur_index]) >= len(items[non_blur_index]): return items[blur_index] else: return items[non_blur_index]
async def parse_receipt_image_and_send_poll(self, message: types.Message): image = message.photo[-1] image_name = image.file_unique_id + ".jpg" path_to_image = os.path.join(INPUT_FOLDER, image_name) behavior_log( "User: {user}, Trying to fetch receipt image {name}".format( user=message.chat.id, name=image_name)) await image.download(path_to_image) await message.answer(text="Идет распознавание чека") # self.img_parser.find_receipt_on_image_and_crop_it(path_to_image) items = await self.img_parser.parse(image_name) if len(items) == 0: await self._bot.send_message( chat_id=message.chat.id, text="Не удалось распознать чек :( \n" "Сфотографируйте его как можно ближе и без вспышки") else: receipt_document = self.init_receipt_document( chat_id=message.chat.id, data={ RAW_ITEMS: items, DIALOG_STATE_ID: self.state.ITEMS_VALIDATION }) self._db.set_receipt(document=receipt_document) await self.send_raw_items_for_validation(message, items)
def extract_items(self, receipt_lines): items = [] for line in receipt_lines: for stop_word in self._config.sum_keys: if fnmatch.fnmatch(line, f"*{stop_word}*"): return items match = re.search(self._config.item_format, line) if hasattr(match, "group") and len(match.groups()) >= 3: line = line.lower().replace("\n", "") behavior_log( "Matched line with receipt option regexp: {line}".format( line=line)) name, quantity, price = self.get_item_attrs(regexp_match=match) if len(name) > 3: parse_stop = False for word in self._config.ignore_keys: parse_stop = fnmatch.fnmatch(name, f"*{word}*") if parse_stop: break if not parse_stop: item = self.set_item_attrs(name, quantity, price) if item: items.append(item) behavior_log("Finish image processing and sending items to bot") return items
def get_receipt(self, keys, **kwargs): behavior_log("Find document in {coll} by query: {query}".format( coll=self.RECEIPTS, query=keys)) receipt = self.find(collection=self.RECEIPTS, query=keys, **kwargs) behavior_log( "Document was successfully found: {doc}".format(doc=receipt)) return receipt if receipt else {}
def _get_ticket_id(self, qr: str) -> str: behavior_log("Fetch ticket id from {url}".format(url=self.TICKET_URL)) resp = self.request_handling(method=Methods.POST, url=self.TICKET_URL, json={"qr": qr}, headers=self.headers_with_session) ticket_id = resp["id"] if resp else "" return ticket_id
def __init__(self, dispatcher): self._bot = dispatcher.bot self._db = ReceiptsDBConnector() self.qr_parser = QRParser() self.img_parser = ImageParser() self.state = UserState() self.markup = ReplyMarkups() behavior_log("Init {bot}".format(bot=type(self).__name__))
def _set_session_id(self) -> None: behavior_log("Set session id with federal tax service") resp = self.request_handling(method=Methods.POST, url=self.AUTH_URL, json=self.__auth_payload, headers=self.headers) session_id = resp["sessionId"] if resp else None self.__session_id = session_id
async def parse_receipt_qr_and_send_poll(self, message: types.Message): behavior_log("User: {user}, Start parsing qr code {code}".format( user=message.chat.id, code=message.text)) items = await self.qr_parser.get_ticket_items(qr=message.text) if len(items) == 0: await self._bot.send_message( chat_id=message.chat.id, text= "Не удалось получить чек у налоговой службы, попробуйте позже") else: await message.answer(text="Данные по чеку получены") await self.save_receipt_and_ask_for_voters_count(message, items)
async def close_receipt(self, receipt_id): behavior_log("Closing receipt: {id}".format(id=receipt_id)) receipt = self._db.get_receipt(keys={RECEIPT_ID: receipt_id}) debt_results = self.debt_calculations(receipt) for user_id, debt in debt_results.items(): behavior_log( "User: {user}, Send debt to user: sum = {debt}".format( user=user_id, debt=debt)) await self._bot.send_message( chat_id=user_id, text="Опрос окончен! \n" "Ваш долг по чеку составляет {:.2f} руб".format(debt))
def update_receipt_by_id(self, receipt_id, update): set_key = "$set" mongo_update = {set_key: {}} for key, value in update.items(): value = value if self.pickle_check(value) else pickle.dumps(value) mongo_update[set_key][key] = value behavior_log("Update document in {coll} by id: {id}".format( coll=self.RECEIPTS, id=receipt_id)) self.update_one(collection=self.RECEIPTS, query={RECEIPT_ID: receipt_id}, data=mongo_update) behavior_log( "Document was successfully updated. Update data: {data}".format( data=update))
async def close_inline_poll(self, callback, receipt): voters_count = receipt[VOTERS_COUNT] + 1 behavior_log( "User: {user}, Closing poll".format(user=callback.from_user.id)) self._db.update_receipt_by_id(receipt_id=receipt[RECEIPT_ID], update={ VOTERS_COUNT: voters_count, ACCESS_TIMESTAMP: time.time() }) await self._bot.answer_callback_query(callback.id) await self._bot.send_message( chat_id=callback.message.chat.id, text="Спасибо! Ожидате окончания голосования") if voters_count == receipt[TOTAL_VOTERS_COUNT]: await self.close_receipt(receipt_id=receipt[RECEIPT_ID])
async def get_ticket_items(self, qr: str): loop = asyncio.get_running_loop() behavior_log("Get running loop for asynchronous ticket fetch") with ThreadPoolExecutor() as pool: futures = [ loop.run_in_executor( pool, partial(self._get_federal_tax_ticket, qr=qr)), loop.run_in_executor( pool, partial(self._get_backup_ofd_ticket, qr=qr)) ] tickets = await asyncio.gather(*futures) for ticket in tickets: if ticket: self._ticket_processing(ticket) return ticket.get(ITEMS) return []
async def inline_poll_handler(self, callback_query: types.CallbackQuery): user_id = str(callback_query.from_user.id) behavior_log( "User: {user}, Inline poll callback handle".format(user=user_id)) receipt = self._db.get_receipt(keys={ self.composite_key(USERS, user_id, USER_ID): user_id, DIALOG_STATE_ID: self.state.USERS_VOTE }, sort=[(ACCESS_TIMESTAMP, -1)]) pickled_markup = receipt[USERS][user_id][OPTIONS_MARKUP] user_markup = self.unpickle_markup(pickled_markup) await self.edit_inline_poll(callback_query, user_markup, receipt) if callback_query.data == self.markup.callback_data.CLOSE_POLL: await self.close_inline_poll(callback_query, receipt)
async def create_start_deeplink(self, message: types.Message): behavior_log("User: {user}, Creating receipt deeplink".format( user=message.chat.id)) receipt = self._db.get_receipt_by_state( chat_id=message.chat.id, state_id=self.state.ENTER_VOTERS_COUNT) self._db.update_receipt_by_id(receipt_id=receipt[RECEIPT_ID], update={ TOTAL_VOTERS_COUNT: int(message.text), DIALOG_STATE_ID: self.state.USERS_VOTE, ACCESS_TIMESTAMP: time.time() }) link = await deep_linking.get_start_link( payload=self.DEEP_LINK_TRIGGER + receipt[RECEIPT_ID]) await message.answer(text=link) behavior_log("User: {user}, Send created deeplink: {link}".format( user=message.chat.id, link=link)) await message.answer( text="Скопируйте ссылку на опрос и отправьте друзьям")
async def edit_inline_poll(self, callback, user_markup, receipt): user_id = str(callback.from_user.id) behavior_log("User: {user}, Edit poll with callback {data}".format( user=user_id, data=callback.data)) updated_user_markup, updated_master_markup = self.markup.update_options_markup( callback_data=callback.data, master_markup=self.unpickle_markup(receipt[RECEIPT_MARKUP]), user_markup=user_markup) self._db.update_receipt_by_id(receipt_id=receipt[RECEIPT_ID], update={ self.composite_key( USERS, user_id, OPTIONS_MARKUP): updated_user_markup, RECEIPT_MARKUP: updated_master_markup, ACCESS_TIMESTAMP: time.time() }) await self._bot.answer_callback_query(callback.id) await self._edit_inline_poll(callback, updated_user_markup)
def _get_backup_ofd_ticket(self, qr: str) -> dict: behavior_log( "Fetch ticket description for qr code '{qr}' from backup URL: {url}" .format(qr=qr, url=self.BACKUP_TICKETS_URL)) command = 'curl --data "{qr}" {host}'.format( qr=qr, host=self.BACKUP_TICKETS_URL) pipe = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=sys.stderr) if not pipe.stderr: try: stdout = pipe.stdout.read().decode() resp = json.loads(stdout) except JSONDecodeError: return {} ticket = resp.get("data") if isinstance(ticket, dict): receipt = ticket["json"] behavior_log( "Successful backup OFD receipt obtaining: receipt={receipt}" .format(receipt=receipt)) return receipt else: behavior_log("Fail to obtain receipt from backup OFD") return {} else: return {}
async def start_inline_poll(self, message: types.Message): user_id = str(message.from_user.id) receipt_id = message.text.replace( "/start {}".format(self.DEEP_LINK_TRIGGER), "") behavior_log( "User: {user}, Start poll by deeplink: receipt {id}".format( user=message.chat.id, id=receipt_id)) receipt = self._db.get_receipt(keys={RECEIPT_ID: receipt_id}) behavior_log("User: {user}, Set inline poll for user".format( user=message.chat.id)) inline_markup = self.markup.inline_options( items=receipt[CLEAN_ITEMS], total_voters_count=receipt[TOTAL_VOTERS_COUNT]) self._db.update_receipt_by_id(receipt_id=receipt[RECEIPT_ID], update={ self.composite_key(USERS, user_id): self._get_user_document( user_id, inline_markup), ACCESS_TIMESTAMP: time.time() }) behavior_log( "User: {user}, Sending inline poll".format(user=message.chat.id)) await self._bot.send_message( chat_id=message.chat.id, text= "Выберите нужные позиции в чеке \nДля общих позиций нажмите на 'шаг' чтобы сделать его дробным", reply_markup=inline_markup)
def request_handling(method, url, **kwargs): response = {} try: if method == Methods.GET: response = requests.get(url=url, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), **kwargs) elif method == Methods.POST: response = requests.post(url=url, timeout=(CONNECT_TIMEOUT, READ_TIMEOUT), **kwargs) except requests.exceptions.RequestException: behavior_log("Request exception occurred", level="ERROR", exc_info=True) finally: if hasattr(response, "text") and hasattr(response, "status_code"): behavior_log("Obtain response from {url}: {response}".format( url=url, response=response.text)) return response.json() if response.status_code == 200 else {} else: return {}
def _ticket_processing(self, ticket): def _preprocessing_payload(): return { "user": ticket.get("user", ""), "userInn": ticket.get("userInn", "").strip(), "retailPlaceAddress": "", "kktRegId": ticket.get("kktRegId", "").strip(), "fiscalDocumentNumber": ticket.get("fiscalDocumentNumber", 0), "fiscalSign": ticket.get("fiscalSign", 0), "totalSum": ticket.get("totalSum", 0), "dateTime": ticket.get("dateTime", ""), "items": [] } def _clean_items(): clean_items = [] for item in ticket[ITEMS]: clean_item = { NAME: item[NAME], PRICE: item[PRICE], QUANTITY: item[QUANTITY], SUM: item[SUM] } clean_items.append(clean_item) return clean_items behavior_log("Start preprocessing receipt options") payload = _preprocessing_payload() payload[ITEMS] = _clean_items() if isinstance(payload["dateTime"], int): payload["dateTime"] = datetime.fromtimestamp( ticket["dateTime"]).isoformat() behavior_log("Sending raw receipt data for preprocessing") response = self.request_handling(method=Methods.POST, url=self.TINKOFF_FNS_NLP_URL, json=payload) processed_items = response.get("result", {}).get(ITEMS, []) for position, item in enumerate(ticket[ITEMS]): item[NAME] = processed_items[position][ "look"] if processed_items else item[NAME] item[PRICE] = int(item[SUM]) / 100 item[QUANTITY] = item[QUANTITY] if isinstance(item[QUANTITY], int) else 1 behavior_log("Finish receipt preprocessing")
def _get_federal_tax_ticket(self, qr: str) -> dict: ticket_id = self._get_ticket_id(qr) ticket_description_url = self.TICKETS_URL + ticket_id behavior_log("Fetch ticket description by id={id} from {url}".format( id=ticket_id, url=self.TICKET_URL)) resp = self.request_handling(method=Methods.GET, url=ticket_description_url, headers=self.headers_with_session) ticket = resp.get("ticket") if ticket: receipt = ticket["document"]["receipt"] behavior_log( "Successful federal tax service receipt obtaining: receipt={receipt}" .format(receipt=receipt)) return receipt else: behavior_log("Fail to obtain receipt from FTS") return {}
def __init__(self): self.__session_id = None self._set_session_id() behavior_log("Init {parser}".format(parser=type(self).__name__))
def set_receipt(self, document): behavior_log("Insert in {coll} document: {doc}".format( coll=self.RECEIPTS, doc=document)) self.insert_one(collection=self.RECEIPTS, data=document) behavior_log("Document was successfully inserted")
def __init__(self): behavior_log("Init {connector}".format(connector=type(self).__name__)) super().__init__() self.drop(self.RECEIPTS)
def __init__(self): self._config = read_config(PARSER_CONFIG_PATH) behavior_log("Init {parser}".format(parser=type(self).__name__))