def __transcribe_chunk(chunk, lang): if lang not in config.get_config_prop("wit"): logger.error("Language not found in wit.json %s", lang) return None logging.debug("Using key %s %s", lang, config.get_config_prop("wit")[lang]) headers = { 'authorization': 'Bearer ' + config.get_config_prop("wit")[lang], 'accept': 'application/vnd.wit.20180705+json', 'content-type': 'audio/raw;encoding=signed-integer;bits=16;rate=8000;endian=little', } text = None try: request = requests.request("POST", "https://api.wit.ai/speech", headers=headers, params={'verbose': True}, data=io.BufferedReader( io.BytesIO(chunk.raw_data))) logger.debug("Request response %s", request.text) res = request.json() if '_text' in res: text = res['_text'] except Exception as e: logger.error("Could not transcribe chunk: %s", traceback.format_exc()) return text
def translate(bot, update): chat_id = get_chat_id(update) message = update.message or update.channel_post if not message: return lang = message.text lang = lang.replace("/translate", "").strip() logger.debug("Language %s", lang) if lang not in config.get_config_prop("app")["languages"]: bot.send_message(chat_id=chat_id, text=R.get_string_resource( "translate_language_not_found", TBDB.get_chat_lang(chat_id)).format(lang), is_group=chat_id < 0) return lang = config.get_config_prop("app")["languages"][lang].split('-')[0] if not message.reply_to_message: bot.send_message(chat_id=chat_id, text=R.get_string_resource( "translate_reply_to_message", TBDB.get_chat_lang(chat_id)), is_group=chat_id < 0) return translation = translator.translate(source=TBDB.get_chat_lang(chat_id), target=lang, text=message.reply_to_message.text) message.reply_text(translation)
def init(): global flood_ratio, max_flood_ratio, time_threshold_warning, time_threshold_flood, timeout flood_ratio = config.get_config_prop("app")["antiflood"]["flood_ratio"] max_flood_ratio = config.get_config_prop("app")["antiflood"]["max_flood_ratio"] time_threshold_warning = config.get_config_prop("app")["antiflood"]["time_threshold_warning"] time_threshold_flood = config.get_config_prop("app")["antiflood"]["time_threshold_flood"] timeout = config.get_config_prop("app")["antiflood"]["timeout"] logger.info("Ratio: %d", flood_ratio) logger.info("Max flood ratio: %d", max_flood_ratio) logger.info("Thr warning: %d", time_threshold_warning) logger.info("Thr flood: %d", time_threshold_flood) logger.info("Timeout: %d", timeout)
def __pre__hook(self, fn, u, c, **kwargs): b = c.bot m = u.message or u.channel_post if not m: return age = (datetime.utcnow() - m.date.replace(tzinfo=None)).total_seconds() if age > config.get_config_prop("app")["antiflood"]["age_threshold"]: return chat_id = get_chat_id(u) antiflood.on_chat_msg_received(chat_id) if chat_id in self.floods and self.floods[chat_id] is True: return if not TBDB.get_chat_entry(chat_id): # happens when welcome/joined message is not received TBDB.create_default_chat_entry(chat_id, 'en-US') if chat_id in self.mqbot.active_chats_cache and self.mqbot.active_chats_cache[chat_id] == 0: logger.debug("Marking chat {} as active".format(chat_id)) self.mqbot.active_chats_cache[chat_id] = 1 TBDB.set_chat_active(chat_id, self.mqbot.active_chats_cache[chat_id]) return fn(b, u, **kwargs)
def language_handler(bot, update, language): chat_id = get_chat_id(update) lang = config.get_config_prop("app")["languages"][language] #ISO 639-1 code for language TBDB.set_chat_lang(chat_id, lang) message = R.get_string_resource("language_set", lang).replace("{lang}", language) reply = update.message or update.channel_post reply.reply_text(message)
def translate(source, target, text): global yandex_translate_url autodetect = detect_language(text) if autodetect is not None: source = autodetect print("Autodetected language: {0}".format(autodetect)) lang = source + "-" + target print(lang) r = requests.post(yandex_translate_url.format( config.get_config_prop("yandex")["translate_key"]), data={ 'lang': lang, 'text': text }) print(r) res = r.json() print(res) return str( res['text'] [0]) + "\n\nPowered by Yandex.Translate http://translate.yandex.com"
def process_media_voice(bot, update, media, name): chat_id = get_chat_id(update) file_size = media.file_size if file_size >= 20 * (1024**2): message_id = get_message_id(update) bot.send_message(chat_id=chat_id, text=R.get_string_resource( "file_too_big", TBDB.get_chat_lang(chat_id)) + "\n", reply_to_message_id=message_id, parse_mode="html", is_group=chat_id < 0) return file_id = media.file_id file_path = os.path.join( config.get_config_prop("app")["media_path"], file_id) file = bot.get_file(file_id) file.download(file_path) try: transcribe_audio_file(bot, update, file_path) except Exception as e: logger.error("Exception handling %s from %d: %s", name, chat_id, traceback.format_exc()) finally: os.remove(file_path)
def start(self, token): self.voice_thread_pool = ThreadPoolExecutor( max_workers=config.get_config_prop("app")["voice_max_threads"]) self.photos_thread_pool = ThreadPoolExecutor( max_workers=config.get_config_prop("app")["photos_max_threads"]) self.misc_thread_pool = ThreadPoolExecutor(max_workers=2) self.queue = mq.MessageQueue() self.request = Request(con_pool_size=10) self.mqbot = self.MQBot(token, request=self.request, mqueue=self.queue) self.updater = Updater(bot=self.mqbot) self.dispatcher = self.updater.dispatcher self.__register_handlers() self.updater.start_polling(clean=True) self.updater.idle()
def _send_report_email(report_object): '''Create and send report email ''' log_file = join(dirname(config.config_location), 'forklift.log') with open(template, 'r') as template_file: email_content = pystache.render(template_file.read(), report_object) send_email(config.get_config_prop('notify'), 'Forklift Report', email_content, log_file)
def detect_language(text): global yandex_detect_url r = requests.post(yandex_detect_url.format( config.get_config_prop("yandex")["translate_key"]), data={'text': text}) res = r.json() if 'lang' in res: return res['lang'] else: return None
def send_email(to, subject, body, attachment=''): ''' to: string | string[] subject: string body: string | MIMEMultipart attachment: string - the path to a text file to attach. Send an email. ''' from_address = environ.get('FORKLIFT_FROM_ADDRESS') smtp_server = environ.get('FORKLIFT_SMTP_SERVER') smtp_port = environ.get('FORKLIFT_SMTP_PORT') if None in [from_address, smtp_server, smtp_port]: log.warn('Required environment variables for sending emails do not exist. No emails sent. See README.md for more details.') return if not isinstance(to, basestring): to_addresses = ','.join(to) else: to_addresses = to if isinstance(body, basestring): message = MIMEMultipart() message.attach(MIMEText(body, 'html')) else: message = body message['Subject'] = subject message['From'] = from_address message['To'] = to_addresses if isfile(attachment): log_file_attachment = MIMEBase('application', 'octet-stream') log_file_attachment.add_header('Content-Disposition', 'attachment; filename="{}"'.format(basename(attachment))) with (open(attachment, 'rb')) as log_file: log_file_attachment.set_payload(log_file.read()) encoders.encode_base64(log_file_attachment) message.attach(log_file_attachment) if get_config_prop('sendEmails'): smtp = SMTP(smtp_server, smtp_port) smtp.sendmail(from_address, to, message.as_string()) smtp.quit() return smtp log.info('sendEmails is False. No email sent.')
def start_lift(file_path=None, pallet_arg=None): log.info('starting forklift') git_errors = git_update() start_seconds = clock() pallets_to_lift, all_pallets = _sort_pallets(file_path, pallet_arg) start_process = clock() core.init(log) lift.process_crates_for(pallets_to_lift, core.update, config.get_config_prop('configuration')) log.info('process_crates time: %s', seat.format_time(clock() - start_process)) start_process = clock() lift.process_pallets(pallets_to_lift) log.info('process_pallets time: %s', seat.format_time(clock() - start_process)) start_copy = clock() copy_results = lift.copy_data(pallets_to_lift, all_pallets, config.get_config_prop('copyDestinations')) log.info('copy_data time: %s', seat.format_time(clock() - start_copy)) start_post_copy_process = clock() lift.process_pallets(pallets_to_lift, is_post_copy=True) log.info('post_copy_process time: %s', seat.format_time(clock() - start_post_copy_process)) elapsed_time = seat.format_time(clock() - start_seconds) report_object = lift.create_report_object(pallets_to_lift, elapsed_time, copy_results, git_errors) _send_report_email(report_object) log.info('Finished in {}.'.format(elapsed_time)) report = _format_dictionary(report_object) log.info('%s', report) return report
def git_update(): warehouse = config.get_config_prop('warehouse') errors = [] for repo_name in config.get_config_prop('repositories'): try: folder = join(warehouse, repo_name.split('/')[1]) if not exists(folder): log.info('git cloning: {}'.format(repo_name)) Repo.clone_from(_repo_to_url(repo_name), join(warehouse, folder)) else: log.info('git updating: {}'.format(repo_name)) repo = _get_repo(folder) origin = repo.remotes[0] fetch_infos = origin.pull() if len(fetch_infos) > 0: if fetch_infos[0].flags == 4: log.debug('no updates to pallet') elif fetch_infos[0].flags in [32, 64]: log.info('updated to %s', fetch_infos[0].commit.name_rev) except Exception as e: errors.append('Git update error for {}: {}'.format(repo_name, e)) return errors
def document(bot, update): chat_id = get_chat_id(update) voice_enabled = TBDB.get_chat_voice_enabled(chat_id) m = update.message or update.channel_post file_name = m.document.file_name _, file_ext = os.path.splitext(file_name) if file_ext[1:] not in config.get_config_prop("app")["audio_ext"]: logger.info('extension %s not recognized', file_ext) return if voice_enabled == 0: return if voice_enabled == 2: pass else: TranscriberBot.get().voice_thread_pool.submit(process_media_voice, bot, update, m.document, 'audio_document')
def transcribe_audio_file(bot, update, path): chat_id = get_chat_id(update) lang = TBDB.get_chat_lang(chat_id) message_id = get_message_id(update) is_group = chat_id < 0 api_key = config.get_config_prop("wit").get(lang, None) if api_key is None: logger.error("Language not found in wit.json %s", lang) message = bot.send_message( chat_id=chat_id, text=R.get_string_resource("unknown_api_key", lang).format(language=lang) + "\n", reply_to_message_id=message_id, parse_mode="html", is_group=is_group).result() return logger.debug("Using key %s for lang %s", api_key, lang) message = bot.send_message( chat_id=chat_id, text=R.get_string_resource("transcribing", lang) + "\n", reply_to_message_id=message_id, parse_mode="html", is_group=is_group).result() TranscriberBot.get().start_thread(message_id) logger.debug("Starting thread %d", message_id) keyboard = InlineKeyboardMarkup( [[InlineKeyboardButton("Stop", callback_data=message_id)]]) text = "" if is_group: text = R.get_string_resource("transcription_text", lang) + "\n" success = False for speech in audiotools.transcribe(path, api_key): logger.debug("Thread %d running: %r", message_id, TranscriberBot.get().thread_running(message_id)) if TranscriberBot.get().thread_running(message_id) is False: TranscriberBot.get().del_thread(message_id) return retry = True retry_num = 0 while retry and TranscriberBot.get().thread_running(message_id): try: if len(text + " " + speech) >= 4000: text = R.get_string_resource("transcription_continues", lang) + "\n" message = bot.send_message( chat_id=chat_id, text=text + " " + speech + " <b>[...]</b>", reply_to_message_id=message.message_id, parse_mode="html", is_group=is_group, reply_markup=keyboard).result() else: message = bot.edit_message_text( text=text + " " + speech + " <b>[...]</b>", chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group, reply_markup=keyboard).result() text += " " + speech retry = False success = True except telegram.error.TimedOut as t: logger.error("Timeout error %s", traceback.format_exc()) retry_num += 1 if retry_num >= 3: retry = False except telegram.error.RetryAfter as r: logger.warning("Retrying after %d", r.retry_after) time.sleep(r.retry_after) except telegram.error.TelegramError as te: logger.error("Telegram error %s", traceback.format_exc()) retry = False except Exception as e: logger.error("Exception %s", traceback.format_exc()) retry = False retry = True retry_num = 0 while retry and TranscriberBot.get().thread_running(message_id): try: if success: bot.edit_message_text(text=text, chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group) else: bot.edit_message_text(R.get_string_resource( "transcription_failed", lang), chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group) retry = False except telegram.error.TimedOut as t: logger.error("Timeout error %s", traceback.format_exc()) retry_num += 1 if retry_num >= 3: retry = False except telegram.error.RetryAfter as r: logger.warning("Retrying after %d", r.retry_after) time.sleep(r.retry_after) except telegram.error.TelegramError as te: logger.error("Telegram error %s", traceback.format_exc()) retry = False except Exception as e: logger.error("Exception %s", traceback.format_exc()) retry = False TranscriberBot.get().del_thread(message_id)
def process_media_photo(bot, update, photo, chat): chat_id = get_chat_id(update) message_id = get_message_id(update) is_group = chat_id < 0 message = None if chat["photos_enabled"] == 1: message = bot.send_message(chat_id=chat_id, text=R.get_string_resource( "photo_recognizing", chat["lang"]), reply_to_message_id=message_id, parse_mode="html", is_group=is_group).result() file_id = photo[-1].file_id file_path = os.path.join( config.get_config_prop("app")["media_path"], file_id) bot.get_file(file_id).download(file_path) def process(message): if chat["qr_enabled"] == 1: qr = phototools.read_qr(file_path, chat["lang"]) if qr is not None: if is_group: qr = R.get_string_resource("qr_result", chat["lang"]) + "\n" + qr if message is not None: bot.edit_message_text(text=qr, chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group) return else: message = bot.send_message(chat_id=chat_id, text=qr, reply_to_message_id=message_id, parse_mode="html", is_group=is_group).result() if chat["photos_enabled"] == 1: text = phototools.image_ocr(file_path, chat["lang"]) if text is not None: if is_group: text = R.get_string_resource("ocr_result", chat["lang"]) + "\n" + text bot.edit_message_text(text=text, chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group) return bot.edit_message_text(text=R.get_string_resource( "photo_no_text", chat["lang"]), chat_id=chat_id, message_id=message.message_id, parse_mode="html", is_group=is_group) retry = True retry_num = 0 try: while retry: process(message) retry = False except telegram.error.TimedOut as t: logger.error("Timeout error %s", traceback.format_exc()) retry_num += 1 if retry_num >= 3: retry = False except telegram.error.RetryAfter as r: logger.warning("Retrying after %d", r.retry_after) time.sleep(r.retry_after) except telegram.error.TelegramError as te: logger.error("Telegram error %s", traceback.format_exc()) retry = False except Exception as e: logger.error("Exception %s", traceback.format_exc()) retry = False finally: os.remove(file_path)
def _get_db(): return Database(config.get_config_prop("app")["database"])
def setup_function(function): config.init(os.path.abspath('config')) config.get_config_prop("app")["database"] = "tmp.db" database.init_schema(config.get_config_prop("app")["database"])
def teardown_function(function): os.remove(config.get_config_prop("app")["database"])
def filter(self, message): admins = config.get_config_prop('telegram')['admins'] is_owner = list( filter(lambda admin: admin == str(message.chat.id), admins)) return len(is_owner) > 0
import coloredlogs, logging import config import resources import database import antiflood from telegram.ext import Filters import transcriberbot from transcriberbot import TranscriberBot coloredlogs.install( level='DEBUG', fmt= '%(asctime)s - %(name)s - %(levelname)s - %(filename)s [%(funcName)s:%(lineno)d] - %(message)s' ) logger = logging.getLogger(__name__) if __name__ == '__main__': config.init('../config') resources.init("../values") antiflood.init() transcriberbot.init() database.init_schema(config.get_config_prop("app")["database"]) TranscriberBot.get().start(config.get_config_prop("telegram")["token"])
def get_language_list(): return config.get_config_prop("app")["languages"].keys()
def _get_repos(): return config.get_config_prop('repositories')
def list_pallets(): return _get_pallets_in_folder(config.get_config_prop('warehouse'))