Example #1
0
def __transcribe_chunk(chunk, lang):
    if lang not in config.get_config_prop("wit"):
        logger.error("Language not found in wit.json %s", lang)
        return None

    logging.debug("Using key %s %s", lang, config.get_config_prop("wit")[lang])

    headers = {
        'authorization':
        'Bearer ' + config.get_config_prop("wit")[lang],
        'accept':
        'application/vnd.wit.20180705+json',
        'content-type':
        'audio/raw;encoding=signed-integer;bits=16;rate=8000;endian=little',
    }

    text = None
    try:
        request = requests.request("POST",
                                   "https://api.wit.ai/speech",
                                   headers=headers,
                                   params={'verbose': True},
                                   data=io.BufferedReader(
                                       io.BytesIO(chunk.raw_data)))

        logger.debug("Request response %s", request.text)
        res = request.json()

        if '_text' in res:
            text = res['_text']

    except Exception as e:
        logger.error("Could not transcribe chunk: %s", traceback.format_exc())

    return text
def translate(bot, update):
    chat_id = get_chat_id(update)
    message = update.message or update.channel_post
    if not message:
        return

    lang = message.text
    lang = lang.replace("/translate", "").strip()
    logger.debug("Language %s", lang)

    if lang not in config.get_config_prop("app")["languages"]:
        bot.send_message(chat_id=chat_id,
                         text=R.get_string_resource(
                             "translate_language_not_found",
                             TBDB.get_chat_lang(chat_id)).format(lang),
                         is_group=chat_id < 0)
        return

    lang = config.get_config_prop("app")["languages"][lang].split('-')[0]

    if not message.reply_to_message:
        bot.send_message(chat_id=chat_id,
                         text=R.get_string_resource(
                             "translate_reply_to_message",
                             TBDB.get_chat_lang(chat_id)),
                         is_group=chat_id < 0)
        return

    translation = translator.translate(source=TBDB.get_chat_lang(chat_id),
                                       target=lang,
                                       text=message.reply_to_message.text)

    message.reply_text(translation)
Example #3
0
def init():
  global flood_ratio, max_flood_ratio, time_threshold_warning, time_threshold_flood, timeout
  flood_ratio = config.get_config_prop("app")["antiflood"]["flood_ratio"]
  max_flood_ratio = config.get_config_prop("app")["antiflood"]["max_flood_ratio"]
  time_threshold_warning = config.get_config_prop("app")["antiflood"]["time_threshold_warning"]
  time_threshold_flood = config.get_config_prop("app")["antiflood"]["time_threshold_flood"]
  timeout = config.get_config_prop("app")["antiflood"]["timeout"]

  logger.info("Ratio: %d", flood_ratio)
  logger.info("Max flood ratio: %d", max_flood_ratio)
  logger.info("Thr warning: %d", time_threshold_warning)
  logger.info("Thr flood: %d", time_threshold_flood)
  logger.info("Timeout: %d", timeout)
Example #4
0
  def __pre__hook(self, fn, u, c, **kwargs):
    b = c.bot

    m = u.message or u.channel_post
    if not m:
      return

    age = (datetime.utcnow() - m.date.replace(tzinfo=None)).total_seconds()
    if age > config.get_config_prop("app")["antiflood"]["age_threshold"]:
      return

    chat_id = get_chat_id(u)
    antiflood.on_chat_msg_received(chat_id)

    if chat_id in self.floods and self.floods[chat_id] is True:
      return

    if not TBDB.get_chat_entry(chat_id):
      # happens when welcome/joined message is not received
      TBDB.create_default_chat_entry(chat_id, 'en-US')
      
    if chat_id in self.mqbot.active_chats_cache and self.mqbot.active_chats_cache[chat_id] == 0:
      logger.debug("Marking chat {} as active".format(chat_id))
      self.mqbot.active_chats_cache[chat_id] = 1
      TBDB.set_chat_active(chat_id, self.mqbot.active_chats_cache[chat_id])

    return fn(b, u, **kwargs)
Example #5
0
def language_handler(bot, update, language):
  chat_id = get_chat_id(update)
  lang = config.get_config_prop("app")["languages"][language] #ISO 639-1 code for language
  TBDB.set_chat_lang(chat_id, lang)
  message = R.get_string_resource("language_set", lang).replace("{lang}", language)
  reply = update.message or update.channel_post
  reply.reply_text(message)
Example #6
0
def translate(source, target, text):
    global yandex_translate_url

    autodetect = detect_language(text)

    if autodetect is not None:
        source = autodetect
        print("Autodetected language: {0}".format(autodetect))

    lang = source + "-" + target
    print(lang)

    r = requests.post(yandex_translate_url.format(
        config.get_config_prop("yandex")["translate_key"]),
                      data={
                          'lang': lang,
                          'text': text
                      })

    print(r)
    res = r.json()
    print(res)
    return str(
        res['text']
        [0]) + "\n\nPowered by Yandex.Translate http://translate.yandex.com"
def process_media_voice(bot, update, media, name):
    chat_id = get_chat_id(update)
    file_size = media.file_size

    if file_size >= 20 * (1024**2):
        message_id = get_message_id(update)
        bot.send_message(chat_id=chat_id,
                         text=R.get_string_resource(
                             "file_too_big", TBDB.get_chat_lang(chat_id)) +
                         "\n",
                         reply_to_message_id=message_id,
                         parse_mode="html",
                         is_group=chat_id < 0)
        return

    file_id = media.file_id
    file_path = os.path.join(
        config.get_config_prop("app")["media_path"], file_id)
    file = bot.get_file(file_id)
    file.download(file_path)

    try:
        transcribe_audio_file(bot, update, file_path)
    except Exception as e:
        logger.error("Exception handling %s from %d: %s", name, chat_id,
                     traceback.format_exc())
    finally:
        os.remove(file_path)
Example #8
0
    def start(self, token):
        self.voice_thread_pool = ThreadPoolExecutor(
            max_workers=config.get_config_prop("app")["voice_max_threads"])
        self.photos_thread_pool = ThreadPoolExecutor(
            max_workers=config.get_config_prop("app")["photos_max_threads"])

        self.misc_thread_pool = ThreadPoolExecutor(max_workers=2)

        self.queue = mq.MessageQueue()
        self.request = Request(con_pool_size=10)
        self.mqbot = self.MQBot(token, request=self.request, mqueue=self.queue)
        self.updater = Updater(bot=self.mqbot)
        self.dispatcher = self.updater.dispatcher
        self.__register_handlers()
        self.updater.start_polling(clean=True)
        self.updater.idle()
Example #9
0
File: cli.py Project: agrc/forklift
def _send_report_email(report_object):
    '''Create and send report email
    '''
    log_file = join(dirname(config.config_location), 'forklift.log')

    with open(template, 'r') as template_file:
        email_content = pystache.render(template_file.read(), report_object)

    send_email(config.get_config_prop('notify'), 'Forklift Report', email_content, log_file)
Example #10
0
def detect_language(text):
    global yandex_detect_url

    r = requests.post(yandex_detect_url.format(
        config.get_config_prop("yandex")["translate_key"]),
                      data={'text': text})
    res = r.json()

    if 'lang' in res:
        return res['lang']
    else:
        return None
Example #11
0
def send_email(to, subject, body, attachment=''):
    '''
    to: string | string[]
    subject: string
    body: string | MIMEMultipart
    attachment: string - the path to a text file to attach.

    Send an email.
    '''
    from_address = environ.get('FORKLIFT_FROM_ADDRESS')
    smtp_server = environ.get('FORKLIFT_SMTP_SERVER')
    smtp_port = environ.get('FORKLIFT_SMTP_PORT')

    if None in [from_address, smtp_server, smtp_port]:
        log.warn('Required environment variables for sending emails do not exist. No emails sent. See README.md for more details.')
        return

    if not isinstance(to, basestring):
        to_addresses = ','.join(to)
    else:
        to_addresses = to

    if isinstance(body, basestring):
        message = MIMEMultipart()
        message.attach(MIMEText(body, 'html'))
    else:
        message = body

    message['Subject'] = subject
    message['From'] = from_address
    message['To'] = to_addresses

    if isfile(attachment):
        log_file_attachment = MIMEBase('application', 'octet-stream')
        log_file_attachment.add_header('Content-Disposition', 'attachment; filename="{}"'.format(basename(attachment)))

        with (open(attachment, 'rb')) as log_file:
            log_file_attachment.set_payload(log_file.read())

        encoders.encode_base64(log_file_attachment)
        message.attach(log_file_attachment)

    if get_config_prop('sendEmails'):
        smtp = SMTP(smtp_server, smtp_port)
        smtp.sendmail(from_address, to, message.as_string())
        smtp.quit()

        return smtp

    log.info('sendEmails is False. No email sent.')
Example #12
0
File: cli.py Project: agrc/forklift
def start_lift(file_path=None, pallet_arg=None):
    log.info('starting forklift')

    git_errors = git_update()

    start_seconds = clock()

    pallets_to_lift, all_pallets = _sort_pallets(file_path, pallet_arg)

    start_process = clock()
    core.init(log)
    lift.process_crates_for(pallets_to_lift, core.update, config.get_config_prop('configuration'))
    log.info('process_crates time: %s', seat.format_time(clock() - start_process))

    start_process = clock()
    lift.process_pallets(pallets_to_lift)
    log.info('process_pallets time: %s', seat.format_time(clock() - start_process))

    start_copy = clock()
    copy_results = lift.copy_data(pallets_to_lift, all_pallets, config.get_config_prop('copyDestinations'))
    log.info('copy_data time: %s', seat.format_time(clock() - start_copy))

    start_post_copy_process = clock()
    lift.process_pallets(pallets_to_lift, is_post_copy=True)
    log.info('post_copy_process time: %s', seat.format_time(clock() - start_post_copy_process))

    elapsed_time = seat.format_time(clock() - start_seconds)
    report_object = lift.create_report_object(pallets_to_lift, elapsed_time, copy_results, git_errors)

    _send_report_email(report_object)

    log.info('Finished in {}.'.format(elapsed_time))

    report = _format_dictionary(report_object)
    log.info('%s', report)

    return report
Example #13
0
File: cli.py Project: agrc/forklift
def git_update():
    warehouse = config.get_config_prop('warehouse')
    errors = []
    for repo_name in config.get_config_prop('repositories'):
        try:
            folder = join(warehouse, repo_name.split('/')[1])
            if not exists(folder):
                log.info('git cloning: {}'.format(repo_name))
                Repo.clone_from(_repo_to_url(repo_name), join(warehouse, folder))
            else:
                log.info('git updating: {}'.format(repo_name))
                repo = _get_repo(folder)
                origin = repo.remotes[0]
                fetch_infos = origin.pull()

                if len(fetch_infos) > 0:
                    if fetch_infos[0].flags == 4:
                        log.debug('no updates to pallet')
                    elif fetch_infos[0].flags in [32, 64]:
                        log.info('updated to %s', fetch_infos[0].commit.name_rev)
        except Exception as e:
            errors.append('Git update error for {}: {}'.format(repo_name, e))

    return errors
def document(bot, update):
    chat_id = get_chat_id(update)
    voice_enabled = TBDB.get_chat_voice_enabled(chat_id)

    m = update.message or update.channel_post
    file_name = m.document.file_name
    _, file_ext = os.path.splitext(file_name)

    if file_ext[1:] not in config.get_config_prop("app")["audio_ext"]:
        logger.info('extension %s not recognized', file_ext)
        return

    if voice_enabled == 0:
        return

    if voice_enabled == 2:
        pass
    else:
        TranscriberBot.get().voice_thread_pool.submit(process_media_voice, bot,
                                                      update, m.document,
                                                      'audio_document')
def transcribe_audio_file(bot, update, path):
    chat_id = get_chat_id(update)
    lang = TBDB.get_chat_lang(chat_id)
    message_id = get_message_id(update)
    is_group = chat_id < 0

    api_key = config.get_config_prop("wit").get(lang, None)
    if api_key is None:
        logger.error("Language not found in wit.json %s", lang)
        message = bot.send_message(
            chat_id=chat_id,
            text=R.get_string_resource("unknown_api_key",
                                       lang).format(language=lang) + "\n",
            reply_to_message_id=message_id,
            parse_mode="html",
            is_group=is_group).result()
        return
    logger.debug("Using key %s for lang %s", api_key, lang)

    message = bot.send_message(
        chat_id=chat_id,
        text=R.get_string_resource("transcribing", lang) + "\n",
        reply_to_message_id=message_id,
        parse_mode="html",
        is_group=is_group).result()

    TranscriberBot.get().start_thread(message_id)
    logger.debug("Starting thread %d", message_id)

    keyboard = InlineKeyboardMarkup(
        [[InlineKeyboardButton("Stop", callback_data=message_id)]])

    text = ""
    if is_group:
        text = R.get_string_resource("transcription_text", lang) + "\n"
    success = False

    for speech in audiotools.transcribe(path, api_key):
        logger.debug("Thread %d running: %r", message_id,
                     TranscriberBot.get().thread_running(message_id))
        if TranscriberBot.get().thread_running(message_id) is False:
            TranscriberBot.get().del_thread(message_id)
            return

        retry = True
        retry_num = 0

        while retry and TranscriberBot.get().thread_running(message_id):
            try:
                if len(text + " " + speech) >= 4000:
                    text = R.get_string_resource("transcription_continues",
                                                 lang) + "\n"
                    message = bot.send_message(
                        chat_id=chat_id,
                        text=text + " " + speech + " <b>[...]</b>",
                        reply_to_message_id=message.message_id,
                        parse_mode="html",
                        is_group=is_group,
                        reply_markup=keyboard).result()
                else:
                    message = bot.edit_message_text(
                        text=text + " " + speech + " <b>[...]</b>",
                        chat_id=chat_id,
                        message_id=message.message_id,
                        parse_mode="html",
                        is_group=is_group,
                        reply_markup=keyboard).result()

                text += " " + speech
                retry = False
                success = True

            except telegram.error.TimedOut as t:
                logger.error("Timeout error %s", traceback.format_exc())
                retry_num += 1
                if retry_num >= 3:
                    retry = False

            except telegram.error.RetryAfter as r:
                logger.warning("Retrying after %d", r.retry_after)
                time.sleep(r.retry_after)

            except telegram.error.TelegramError as te:
                logger.error("Telegram error %s", traceback.format_exc())
                retry = False

            except Exception as e:
                logger.error("Exception %s", traceback.format_exc())
                retry = False

    retry = True
    retry_num = 0
    while retry and TranscriberBot.get().thread_running(message_id):
        try:
            if success:
                bot.edit_message_text(text=text,
                                      chat_id=chat_id,
                                      message_id=message.message_id,
                                      parse_mode="html",
                                      is_group=is_group)
            else:
                bot.edit_message_text(R.get_string_resource(
                    "transcription_failed", lang),
                                      chat_id=chat_id,
                                      message_id=message.message_id,
                                      parse_mode="html",
                                      is_group=is_group)
            retry = False
        except telegram.error.TimedOut as t:
            logger.error("Timeout error %s", traceback.format_exc())
            retry_num += 1
            if retry_num >= 3:
                retry = False

        except telegram.error.RetryAfter as r:
            logger.warning("Retrying after %d", r.retry_after)
            time.sleep(r.retry_after)

        except telegram.error.TelegramError as te:
            logger.error("Telegram error %s", traceback.format_exc())
            retry = False

        except Exception as e:
            logger.error("Exception %s", traceback.format_exc())
            retry = False

    TranscriberBot.get().del_thread(message_id)
def process_media_photo(bot, update, photo, chat):
    chat_id = get_chat_id(update)
    message_id = get_message_id(update)
    is_group = chat_id < 0

    message = None

    if chat["photos_enabled"] == 1:
        message = bot.send_message(chat_id=chat_id,
                                   text=R.get_string_resource(
                                       "photo_recognizing", chat["lang"]),
                                   reply_to_message_id=message_id,
                                   parse_mode="html",
                                   is_group=is_group).result()

    file_id = photo[-1].file_id
    file_path = os.path.join(
        config.get_config_prop("app")["media_path"], file_id)
    bot.get_file(file_id).download(file_path)

    def process(message):
        if chat["qr_enabled"] == 1:
            qr = phototools.read_qr(file_path, chat["lang"])
            if qr is not None:
                if is_group:
                    qr = R.get_string_resource("qr_result",
                                               chat["lang"]) + "\n" + qr

                if message is not None:
                    bot.edit_message_text(text=qr,
                                          chat_id=chat_id,
                                          message_id=message.message_id,
                                          parse_mode="html",
                                          is_group=is_group)
                    return
                else:
                    message = bot.send_message(chat_id=chat_id,
                                               text=qr,
                                               reply_to_message_id=message_id,
                                               parse_mode="html",
                                               is_group=is_group).result()

        if chat["photos_enabled"] == 1:
            text = phototools.image_ocr(file_path, chat["lang"])
            if text is not None:
                if is_group:
                    text = R.get_string_resource("ocr_result",
                                                 chat["lang"]) + "\n" + text
                bot.edit_message_text(text=text,
                                      chat_id=chat_id,
                                      message_id=message.message_id,
                                      parse_mode="html",
                                      is_group=is_group)
                return

            bot.edit_message_text(text=R.get_string_resource(
                "photo_no_text", chat["lang"]),
                                  chat_id=chat_id,
                                  message_id=message.message_id,
                                  parse_mode="html",
                                  is_group=is_group)

    retry = True
    retry_num = 0
    try:
        while retry:
            process(message)
            retry = False

    except telegram.error.TimedOut as t:
        logger.error("Timeout error %s", traceback.format_exc())
        retry_num += 1
        if retry_num >= 3:
            retry = False

    except telegram.error.RetryAfter as r:
        logger.warning("Retrying after %d", r.retry_after)
        time.sleep(r.retry_after)

    except telegram.error.TelegramError as te:
        logger.error("Telegram error %s", traceback.format_exc())
        retry = False

    except Exception as e:
        logger.error("Exception %s", traceback.format_exc())
        retry = False

    finally:
        os.remove(file_path)
Example #17
0
 def _get_db():
   return Database(config.get_config_prop("app")["database"])
Example #18
0
def setup_function(function):
    config.init(os.path.abspath('config'))
    config.get_config_prop("app")["database"] = "tmp.db"
    database.init_schema(config.get_config_prop("app")["database"])
Example #19
0
def teardown_function(function):
    os.remove(config.get_config_prop("app")["database"])
Example #20
0
 def filter(self, message):
     admins = config.get_config_prop('telegram')['admins']
     is_owner = list(
         filter(lambda admin: admin == str(message.chat.id), admins))
     return len(is_owner) > 0
Example #21
0
import coloredlogs, logging
import config
import resources
import database
import antiflood

from telegram.ext import Filters

import transcriberbot
from transcriberbot import TranscriberBot

coloredlogs.install(
    level='DEBUG',
    fmt=
    '%(asctime)s - %(name)s - %(levelname)s - %(filename)s [%(funcName)s:%(lineno)d] - %(message)s'
)
logger = logging.getLogger(__name__)

if __name__ == '__main__':
    config.init('../config')
    resources.init("../values")
    antiflood.init()
    transcriberbot.init()
    database.init_schema(config.get_config_prop("app")["database"])

    TranscriberBot.get().start(config.get_config_prop("telegram")["token"])
Example #22
0
def get_language_list():
  return config.get_config_prop("app")["languages"].keys()
Example #23
0
File: cli.py Project: agrc/forklift
def _get_repos():
    return config.get_config_prop('repositories')
Example #24
0
File: cli.py Project: agrc/forklift
def list_pallets():
    return _get_pallets_in_folder(config.get_config_prop('warehouse'))