Esempio n. 1
0
 def get_cities(self):
     cities = {}
     for city in self.locations[profile.get_profile_var(
         ["wwis_weather",
          "country"])][profile.get_profile_var(["wwis_weather", "region"])]:
         cities[city] = city
     return cities
Esempio n. 2
0
 def __init__(self, *args):
     self._logger = logging.getLogger(__name__)
     input_device = args[0]
     timeout = profile.get_profile_var(["webrtc_vad", "timeout"], 1)
     minimum_capture = profile.get_profile_var(
         ["webrtc_vad", "minimum_capture"], 0.25)
     aggressiveness = profile.get_profile_var(
         ["webrtc_vad", "aggressiveness"], 1)
     self._logger.info("timeout: {}".format(timeout))
     self._logger.info("minimum_capture: {}".format(minimum_capture))
     self._logger.info("aggressiveness: {}".format(aggressiveness))
     super(WebRTCPlugin, self).__init__(input_device, timeout,
                                        minimum_capture)
     if aggressiveness not in [0, 2, 3]:
         aggressiveness = 1
     self._vad = webrtcvad.Vad(aggressiveness)
     if (self._chunktime not in [0.01, 0.02, 0.03]):
         # From the website:
         #
         # https://github.com/wiseman/py-webrtcvad
         #
         # The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at
         # 8000, 16000, 32000 or 48000 Hz. A frame must be either 10, 20,
         # or 30 ms in duration:
         raise ValueError("\n".join([
             "When using WebRTCVAD, chunks are limited to 10, 20,",
             "or 30 millisends in length.",
             "At current input rate of {}, the allowed chunk sizes are",
             "{} (10ms), {} (20ms) or {} (30 ms).",
             "Please adjust the value of", "audio: ", "  input_chunksize:",
             "in your ~/.config/naomi/configs/profile.yml file."
         ]).format(input_device._input_rate,
                   input_device._input_rate * 0.01,
                   input_device._input_rate * 0.02,
                   input_device._input_rate * 0.03))
Esempio n. 3
0
    def special_mode(self, name, phrases):
        plugin_info = self.plugins.get_plugin(self.special_stt_slug,
                                              category='stt')
        plugin_config = profile.get_profile()

        original_stt_engine = self.active_stt_engine

        # If the special_mode engine is not specifically set,
        # copy the settings from the active stt engine.
        try:
            mode_stt_engine = plugin_info.plugin_class(name, phrases,
                                                       plugin_info,
                                                       plugin_config)
            if (profile.check_profile_var_exists(['special_stt'])):
                if (profile.check_profile_var_exists(
                    ['special_stt', 'samplerate'])):
                    mode_stt_engine._samplerate = int(
                        profile.get_profile_var(['special_stt', 'samplerate']))
                if (profile.check_profile_var_exists(
                    ['special_stt', 'volume_normalization'])):
                    mode_stt_engine._volume_normalization = float(
                        profile.get_profile_var(
                            ['special_stt', 'volume_normalization']))
            else:
                mode_stt_engine._samplerate = original_stt_engine._samplerate
                mode_stt_engine._volume_normalization = original_stt_engine._volume_normalization
            self.active_stt_engine = mode_stt_engine
            yield
        finally:
            self.active_stt_engine = original_stt_engine
Esempio n. 4
0
 def city_isactive(self):
     response = False
     country = profile.get_profile_var(["wwis_weather", "country"])
     if country:
         if (isinstance(self.locations[country], dict)):
             return True if isinstance(
                 self.locations[country][profile.get_profile_var(
                     ["wwis_weather", "region"])], dict) else False
     return response
Esempio n. 5
0
 def __init__(self, *args, **kwargs):
     input_device = args[0]
     timeout = profile.get_profile_var(["snr_vad", "timeout"], 1)
     minimum_capture = profile.get_profile_var(
         ["snr_vad", "minimum_capture"], 0.5)
     threshold = profile.get_profile_var(["snr_vad", "threshold"], 30)
     super(SNRPlugin, self).__init__(input_device, timeout, minimum_capture)
     # if the audio decibel is greater than threshold, then consider this
     # having detected a voice.
     self._threshold = threshold
     # Keep track of the number of audio levels
     self.distribution = {}
Esempio n. 6
0
def mark_read(msg):
    host = profile.get_profile_var(['email', 'imap', 'server'])
    port = int(profile.get_profile_var(['email', 'imap', 'port'], "993"))
    conn = imaplib.IMAP4_SSL(host, port)
    conn.debug = 0

    conn.login(profile.get_profile_password(['email', 'username']),
               profile.get_profile_password(['email', 'password']))
    conn.select(readonly=False)
    (retcode, messages) = conn.search(
        None, "(HEADER Message-ID {})".format(msg['Message-ID']))
    if (retcode == 'OK' and len(messages)):
        conn.store(messages[0].split()[0], '+FLAGS', '\Seen')
    conn.close()
    conn.logout()
Esempio n. 7
0
    def handle(self, intent, mic):
        """
        Responds to user-input, typically speech text, by relaying the
        meaning of life.

        Arguments:
        text -- user-input, typically transcribed speech
        mic -- used to interact with the user (for both input and output)
        """
        name = profile.get_profile_var(['first_name'], '')

        messages = [
            self.gettext("I'm shutting down."),
            self.gettext("Shutting down now."),
            self.gettext("Bye Bye."),
            self.gettext("Goodbye, {}").format(name)
        ]

        message = random.choice(messages)

        mic.say(message)
        # specifically wait for Naomi to finish talking
        # here, otherwise it will exit before getting to
        # speak.
        if (profile.get_arg('listen_while_talking', False)):
            if hasattr(mic, "current_thread"):
                while (mic.current_thread.is_alive()):
                    time.sleep(1)

        quit()
Esempio n. 8
0
 def communicate(self):
     if(profile.get_profile_var([
         'pocketsphinx',
         'phonetisaurus_executable'
     ]) == "phonetisaurus-g2p"):
         return (
             '\n'.join([
                 "GOOD\t9.20477\t<s> G UH D </s>",
                 "GOOD\t14.4036\t<s> G UW D </s>",
                 "GOOD\t16.0258\t<s> G UH D IY </s>",
                 "BAD\t0.7416\t<s> B AE D </s>",
                 "BAD\t12.5495\t<s> B AA D </s>",
                 "BAD\t13.6745\t<s> B AH D </s>",
                 "UGLY\t12.572\t<s> AH G L IY </s>",
                 "UGLY\t17.9278\t<s> Y UW G L IY </s>",
                 "UGLY\t18.9617\t<s> AH G L AY </s>"
             ]).encode("utf-8"),
             "".encode("utf-8")
         )
     else:
         return (
             '\n'.join([
                 "GOOD\t9.20477\tG UH D",
                 "GOOD\t14.4036\tG UW D",
                 "GOOD\t16.0258\tG UH D IY",
                 "BAD\t0.7416\tB AE D",
                 "BAD\t12.5495\tB AA D",
                 "BAD\t13.6745\tB AH D",
                 "UGLY\t12.572\tAH G L IY",
                 "UGLY\t17.9278\tY UW G L IY",
                 "UGLY\t18.9617\tAH G L AY"
             ]).encode("utf-8"),
             "".encode("utf-8")
         )
Esempio n. 9
0
 def get_location_data(self):
     # Set the language used for the location data
     language = profile.get_profile_var(["language"], "en")[:2]
     url = "https://worldweather.wmo.int/en/json/Country_{}.xml".format(
         language)
     response = requests.get(url, timeout=2)
     jsondoc = str(response.content, 'utf-8')
     self.locationdata = json.loads(jsondoc)
     # Make a list of locations
     self.locations = {}
     # Country here is just an index
     for country in self.locationdata["member"]:
         if (isinstance((self.locationdata["member"][country]), dict)):
             memName = self.locationdata["member"][country]["memName"]
             self.locations[memName] = {}
             for city in self.locationdata["member"][country]["city"]:
                 if (", " in city["cityName"]):
                     cityId = city["cityId"]
                     cityName, regionName = city["cityName"].split(", ")
                     if regionName not in self.locations[memName].keys():
                         self.locations[memName][regionName] = {}
                     self.locations[memName][regionName][cityName] = cityId
                 else:
                     cityName = city["cityName"]
                     cityId = city["cityId"]
                     self.locations[memName][cityName] = cityId
Esempio n. 10
0
 def __init__(self, *args, **kwargs):
     self.passive_listen = profile.get_profile_flag(["passive_listen"])
     keyword = profile.get_profile_var(['keyword'], 'NAOMI')
     if isinstance(keyword, list):
         self._keyword = keyword[0]
     else:
         self._keyword = keyword
     return
Esempio n. 11
0
 def get_city_id(self):
     cityId = None
     country = profile.get_profile_var(['wwis_weather', 'country'], "")
     region = profile.get_profile_var(['wwis_weather', 'region'], "")
     city = profile.get_profile_var(['wwis_weather', 'city'], "")
     # check if we have a city or region
     if (isinstance((self.locations[country][region]), dict)):
         try:
             cityId = self.locations[country][region][city]
         except KeyError:
             city = None
     else:
         try:
             cityId = self.locations[country][region]
             city = region
         except KeyError:
             city = None
     return city, cityId
Esempio n. 12
0
    def _regenerate_config(self):
        phrases = []
        phrases.extend(profile.get_profile_var(["keyword"], "Naomi"))

        self._config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            language_code=profile.get(['language'], 'en-US'),
            speech_contexts=[speech.types.SpeechContext(
                phrases=phrases)] if len(phrases) else None,
            model="command_and_search")
Esempio n. 13
0
 def get_regions(self):
     regions = {}
     country = profile.get_profile_var(['wwis_weather', 'country'])
     for region in self.locations[country]:
         regions[region] = region
     if (not regions):
         print(
             "Weather information is not available in {}.".format(country))
         print("Please check nearby cities in other countries")
     return regions
Esempio n. 14
0
def fetch_emails(since=None, email_filter="", markRead=False, limit=None):
    """
        Fetches a list of unread email objects from a user's Email inbox.

        Arguments:
        since -- if provided, no emails before this date will be returned
        markRead -- if True, marks all returned emails as read in target
                    inbox

        Returns:
        A list of unread email objects.
    """
    host = profile.get_profile_var(['email', 'imap', 'server'])
    port = int(profile.get_profile_var(['email', 'imap', 'port'], "993"))
    conn = imaplib.IMAP4_SSL(host, port)
    conn.debug = 0

    conn.login(profile.get_profile_password(['email', 'username']),
               profile.get_profile_password(['email', 'password']))
    conn.select(readonly=(not markRead))

    msgs = []
    (retcode, messages) = conn.search(None, email_filter)
    if retcode == 'OK' and messages != [b'']:
        numUnread = len(messages[0].split(b' '))
        if limit and numUnread > limit:
            return numUnread

        for num in messages[0].split(b' '):
            # parse email RFC822 format
            logging.info("num = {}".format(num))
            (retcode, data) = conn.fetch(num, '(RFC822)')
            raw_email = data[0][1]
            raw_email_str = raw_email.decode("utf-8")
            msg = email.message_from_string(raw_email_str)
            if not since or get_date(msg) > since:
                msgs.append(msg)
    conn.close()
    conn.logout()

    return msgs
Esempio n. 15
0
    def __init__(self, *args, **kwargs):
        plugin.STTPlugin.__init__(self, *args, **kwargs)
        # FIXME: get init args from config

        if (google_env_var in os.environ):
            self._client = speech.SpeechClient()
        else:
            credentials_json = profile.get_profile_var(
                ["google", "credentials_json"])
            cred = service_account.Credentials.from_service_account_file(
                credentials_json)
            self._client = speech.SpeechClient(credentials=cred)
        self._regenerate_config()
Esempio n. 16
0
    def __init__(self, *args, **kwargs):
        plugin.TTSPlugin.__init__(self, *args, **kwargs)

        self._logger = logging.getLogger(__name__)

        self.language = profile.get_profile_var(['language'], 'en-US')

        if (google_env_var in os.environ):
            self.client = texttospeech.TextToSpeechClient()
        else:
            credentials_json = profile.get_profile_var(
                ["google", "credentials_json"])
            cred = service_account.Credentials.from_service_account_file(
                credentials_json)
            self.client = texttospeech.TextToSpeechClient(credentials=cred)
        # Build the voice request, select the language code and
        # voice gender ("neutral")
        self.voice = texttospeech.types.VoiceSelectionParams(
            language_code=self.language,
            ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
        # Select the type of audio file you want returned
        self.audio_config = texttospeech.types.AudioConfig(
            audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16)
Esempio n. 17
0
 def __init__(self, *args, **kwargs):
     self._logger = logging.getLogger(__name__)
     self.language = profile.get_profile_var(['language'])
     self.base_working_dir = paths.sub("pocketsphinx")
     if not os.path.isdir(self.base_working_dir):
         os.mkdir(self.base_working_dir)
     self.standard_dir = os.path.join(self.base_working_dir, "standard")
     if not os.path.isdir(self.standard_dir):
         os.mkdir(self.standard_dir)
     self.standard_dir = os.path.join(self.standard_dir, self.language)
     if not os.path.isdir(self.standard_dir):
         os.mkdir(self.standard_dir)
     self.working_dir = os.path.join(self.base_working_dir, "working")
     self.model_dir = os.path.join(self.working_dir, self.language)
     self.adapt_dir = os.path.join(self.base_working_dir, "adapt",
                                   self.language)
     self.formatteddict_path = os.path.join(self.adapt_dir,
                                            "cmudict.formatted.dict")
     self.audiolog_dir = paths.sub("audiolog")
     self.audiolog_db = os.path.join(self.audiolog_dir, "audiolog.db")
     super(PocketsphinxAdaptPlugin, self).__init__(*args, **kwargs)
Esempio n. 18
0
    def setUp(self):
        self.naomi_clip = paths.data('audio', 'naomi.wav')
        self.time_clip = paths.data('audio', 'time.wav')

        # google_env_var = "GOOGLE_APPLICATION_CREDENTIALS"
        google_env_var = google.google_env_var
        if (google_env_var in os.environ):
            credentials_json = os.getenv(google_env_var)
        elif profile.check_profile_var_exists(["google", "credentials_json"]):
            credentials_json = profile.get_profile_var(
                ["google", "credentials_json"])
        else:
            self.skipTest("Please set " + google_env_var)
        if (not os.path.isfile(os.path.expanduser(credentials_json))):
            self.skiptest(
                "Credentials file {} does not exist".format(credentials_json))

        try:
            self.passive_stt_engine = testutils.get_plugin_instance(
                google.GoogleSTTPlugin, 'unittest-passive', ['NAOMI'])
            self.active_stt_engine = testutils.get_plugin_instance(
                google.GoogleSTTPlugin, 'unittest-active', ['TIME'])
        except ImportError:
            self.skipTest("Google STT not installed!")
Esempio n. 19
0
 def settings(self):
     self.get_location_data()
     _ = self.gettext
     return OrderedDict([
         (('wwis_weather', 'country'), {
             'type':
             'listbox',
             'title':
             _('Please select your country from the list'),
             'description':
             "".join([
                 _('This value is being used to help locate your Area ID, which will be used to provide weather information'
                   )
             ]),
             'options':
             self.get_countries
         }),
         (('wwis_weather', 'region'), {
             'type':
             'listbox',
             'title':
             _('Please select your region or city from the list'),
             'description':
             _('Please select your region or city from the list, which will be used to provide weather information'
               ),
             'options':
             self.get_regions,
             'active':
             lambda: True if profile.check_profile_var_exists([
                 'wwis_weather', 'country'
             ]) and len(profile.get_profile_var(["wwis_weather", "country"])
                        ) > 0 else False
         }),
         (
             ('wwis_weather', 'city'),
             {
                 'type':
                 'listbox',
                 'title':
                 _('Please select your city from the list'),
                 'description':
                 _('Please select your city from the list. This will be used as the default location when providing weather information'
                   ),
                 'options':
                 self.get_cities,
                 # This is only active if the currently selected region is a dictionary and not a city
                 # 'active': lambda: True if isinstance(self.locations[profile.get_profile_var(["wwis_weather", "country"])][profile.get_profile_var(["wwis_weather", "region"])], dict) else False
                 'active':
                 self.city_isactive
             }),
         (('wwis_weather', 'temperature'), {
             'type':
             'listbox',
             'title':
             _("Would you prefer weather in Celcius or Fahrenheit?"),
             'description':
             _("Allows you to receive your weather forecast in either Celcius or Fahrenheit"
               ),
             'options': ['Celcius', 'Fahrenheit'],
             'active':
             lambda: True if profile.check_profile_var_exists([
                 'wwis_weather', 'country'
             ]) and len(profile.get_profile_var(["wwis_weather", "country"])
                        ) > 0 else False,
             'default':
             'Fahrenheit'
         })
     ])
Esempio n. 20
0
    def handle(self, intent, mic):
        # Ideally, we could use our list of countries to check if any country
        # appears in the input, then check for regions in the current country,
        # and finally cities in the selected region, so I should be able to
        # ask for the weather in Paris, France and have it tell me even if my
        # base location is Hoboken, New Jersey.
        # For now we just check to see if "Today" or "Tomorrow" appear
        # in the text, and return the requested day's weather.
        # First, establish the cityId
        _ = self.gettext
        text = intent['input']
        city, cityId = self.get_city_id()
        country = profile.get_profile_var(["wwis_weather", "country"])
        # text = intent.input
        snark = True
        if (cityId):
            # Next, pull the weather data for City
            language = profile.get_profile_var(["language"], "en")[:2]
            url = "https://worldweather.wmo.int/en/json/{}_{}.xml".format(
                cityId, language)
            # print( "Requesting url {}".format(url) )
            response = requests.get(url)
            # print( "Request finished" )
            jsondoc = str(response.content, 'utf-8')
            weatherdata = json.loads(jsondoc)
            # print(json.dumps(weatherdata, indent=4, sort_keys=True))

            forecast = {}
            for day in weatherdata["city"]["forecast"]["forecastDay"]:
                forecast[day["forecastDate"]] = {}
                forecast[day["forecastDate"]]["weather"] = day["weather"]
                if profile.get(['wwis_weather', 'temperature'],
                               "f")[:1].lower() == "f":
                    forecast[day["forecastDate"]]["high"] = day["maxTempF"]
                    forecast[day["forecastDate"]]["low"] = day["minTempF"]
                else:
                    forecast[day["forecastDate"]]["high"] = day["maxTemp"]
                    forecast[day["forecastDate"]]["low"] = day["minTemp"]
            if (not forecast):
                mic.say(
                    _("Sorry, forecast information is not currently available for {} in {}"
                      ).format(weatherdata["city"]["cityName"], country))
            today = datetime.date.today()
            todaydate = "{:4d}-{:02d}-{:02d}".format(today.year, today.month,
                                                     today.day)
            tomorrow = today + datetime.timedelta(days=1)
            tomorrowdate = "{:4d}-{:02d}-{:02d}".format(
                tomorrow.year, tomorrow.month, tomorrow.day)
            if (_("today") in text.lower()):
                if (todaydate in forecast.keys()):
                    mic.say(
                        _("The weather today in {} is {}").format(
                            city, forecast[todaydate]["weather"]))
                    snark = False
            elif (_("tomorrow") in text.lower()):
                if (tomorrowdate in forecast.keys()):
                    mic.say(
                        _("The weather tomorrow in {} will be {}").format(
                            city, forecast[tomorrowdate]["weather"]))
                    snark = False
            else:
                first = True
                for day in sorted(forecast.keys()):
                    if (day == todaydate):
                        DOW = _("today")
                    elif (day == tomorrowdate):
                        DOW = _("tomorrow")
                    else:
                        DOW = WEEKDAY_NAMES[datetime.datetime.strptime(
                            day, "%Y-%m-%d").weekday()]
                    if (first):
                        response = _(
                            "{} in {}, the weather will be {}").format(
                                DOW, city, forecast[day]["weather"])
                        first = False
                    else:
                        response = _("{}, the weather will be {}").format(
                            DOW, forecast[day]["weather"])
                    if (forecast[day]["low"] and forecast[day]["high"]):
                        response += _(
                            " with a low of {} and a high of {} degrees"
                        ).format(forecast[day]["low"], forecast[day]["high"])
                    elif (forecast[day]["low"]):
                        response += _(" with a low of {} degrees").format(
                            forecast[day]["low"])
                    elif (forecast[day]["high"]):
                        response += _(" with a high of {} degrees").format(
                            forecast[day]["high"])
                    mic.say(response)
                    snark = False
        if snark:
            mic.say(_("I don't know. Why don't you look out the window?"))
Esempio n. 21
0
    def settings(self):
        language = profile.get(['language'])
        # Get the defaults for settings
        # hmm_dir
        hmm_dir = profile.get(
            ['pocketsphinx', 'hmm_dir']
        )
        if(not hmm_dir):
            # Make a list of possible paths to check
            hmm_dir_paths = [
                os.path.join(
                    os.path.expanduser("~"),
                    "pocketsphinx-python",
                    "pocketsphinx",
                    "model",
                    "en-us",
                    "en-us"
                ),
                os.path.join(
                    os.path.expanduser("~"),
                    "pocketsphinx",
                    "model",
                    "en-us",
                    "en-us"
                ),
                os.path.join(
                    "/",
                    "usr",
                    "share",
                    "pocketsphinx",
                    "model",
                    "en-us",
                    "en-us"
                ),
                os.path.join(
                    "/usr",
                    "local",
                    "share",
                    "pocketsphinx",
                    "model",
                    "hmm",
                    "en_US",
                    "hub4wsj_sc_8k"
                )
            ]
            # see if any of these paths exist
            for path in hmm_dir_paths:
                if os.path.isdir(path):
                    hmm_dir = path
        # fst_model
        fst_model = profile.get_profile_var(["pocketsphinx", "fst_model"])
        if not fst_model:
            # Make a list of possible paths to check
            fst_model_paths = [
                os.path.join(
                    paths.sub(
                        os.path.join(
                            "pocketsphinx",
                            "adapt",
                            "en-US",
                            "train",
                            "model.fst"
                        )
                    )
                ),
                os.path.join(
                    os.path.expanduser("~"),
                    "pocketsphinx-python",
                    "pocketsphinx",
                    "model",
                    "en-us",
                    "train",
                    "model.fst"
                ),
                os.path.join(
                    os.path.expanduser("~"),
                    "cmudict",
                    "train",
                    "model.fst"
                ),
                os.path.join(
                    os.path.expanduser("~"),
                    "CMUDict",
                    "train",
                    "model.fst"
                ),
                os.path.join(
                    os.path.expanduser("~"),
                    "phonetisaurus",
                    "g014b2b.fst"
                )
            ]
            for path in fst_model_paths:
                if os.path.isfile(path):
                    fst_model = path
        # If either the hmm dir or fst model is missing, then
        # download the standard model
        if not(hmm_dir and os.path.isdir(hmm_dir) and fst_model and os.path.isfile(fst_model)):
            # Start by checking to see if we have a copy of the standard
            # model for this user's chosen language and download it if not.
            # Check for the files we need
            language = profile.get_profile_var(['language'])
            base_working_dir = paths.sub("pocketsphinx")
            if not os.path.isdir(base_working_dir):
                os.mkdir(base_working_dir)
            standard_dir = os.path.join(base_working_dir, "standard")
            if not os.path.isdir(standard_dir):
                os.mkdir(standard_dir)
            standard_dir = os.path.join(standard_dir, language)
            if not os.path.isdir(standard_dir):
                os.mkdir(standard_dir)
            hmm_dir = standard_dir
            fst_model = os.path.join(hmm_dir, "train", "model.fst")
            formatteddict_path = os.path.join(
                hmm_dir,
                "cmudict.formatted.dict"
            )
            if(not check_pocketsphinx_model(hmm_dir)):
                # Check and see if we already have a copy of the standard
                # language model
                print("Downloading and installing the {} pocketsphinx language model".format(language))
                cmd = [
                    'git',
                    'clone',
                    '-b',
                    language,
                    'https://github.com/NaomiProject/CMUSphinx_standard_language_models.git',
                    hmm_dir
                ]
                completedprocess = run_command(cmd)
                self._logger.info(process_completedprocess(completedprocess))
            if(not os.path.isfile(formatteddict_path)):
                print("Formatting the g2p dictionary")
                with open(os.path.join(standard_dir, "cmudict.dict"), "r") as in_file:
                    with open(formatteddict_path, "w+") as out_file:
                        for line in in_file:
                            # Remove whitespace at beginning and end
                            line = line.strip()
                            # remove the number in parentheses (if there is one)
                            line = re.sub('([^\\(]+)\\(\\d+\\)', '\\1', line)
                            # compress all multiple whitespaces into a single whitespace
                            line = re.sub('\s+', ' ', line)
                            # replace the first whitespace with a tab
                            line = line.replace(' ', '\t', 1)
                            print(line, file=out_file)
            if(not os.path.isfile(fst_model)):
                # Use phonetisaurus to prepare an fst model
                print("Training an FST model")
                cmd = [
                    "phonetisaurus-train",
                    "--lexicon", formatteddict_path,
                    "--seq2_del",
                    "--dir_prefix", os.path.join(hmm_dir, "train")
                ]
                completedprocess = run_command(cmd)
                self._logger.info(process_completedprocess(completedprocess))

        phonetisaurus_executable = profile.get_profile_var(
            ['pocketsphinx', 'phonetisaurus_executable']
        )
        if(not phonetisaurus_executable):
            if(check_program_exists('phonetisaurus-g2pfst')):
                phonetisaurus_executable = 'phonetisaurus-g2pfst'
            else:
                phonetisaurus_executable = 'phonetisaurus-g2p'
        _ = self.gettext
        return OrderedDict(
            [
                (
                    ('pocketsphinx', 'hmm_dir'), {
                        'title': _('PocketSphinx hmm file'),
                        'description': "".join([
                            _('PocketSphinx hidden markov model directory')
                        ]),
                        'default': hmm_dir
                    }
                ),
                (
                    ('pocketsphinx', 'fst_model'), {
                        'title': _('PocketSphinx FST file'),
                        'description': "".join([
                            _('PocketSphinx finite state transducer file')
                        ]),
                        'default': fst_model
                    }
                ),
                (
                    ('pocketsphinx', 'phonetisaurus_executable'), {
                        'title': _('Phonetisaurus executable'),
                        'description': "".join([
                            _('Phonetisaurus is used to build custom dictionaries')
                        ]),
                        'default': phonetisaurus_executable
                    }
                ),
            ]
        )
Esempio n. 22
0
    def HandleCommand(self, command, description):
        try:
            conn = sqlite3.connect(self.audiolog_db)
            c = conn.cursor()
            response = []
            continue_next = True
            nextcommand = ""
            if (command == ""):
                response.append(
                    "<h2>Preparing to adapt Pocketsphinx model</h2>")
                description.append(
                    "Adapting standard {} pocketsphinx model".format(
                        self.language))
                nextcommand = "checkenviron"
            if (command == "checkenviron"):
                # Now run through the steps to adapt the standard model
                # Start by checking to see if we have a copy of the standard
                # model for this user's chosen language and download it if not.
                # Check for the files we need
                if (not check_pocketsphinx_model(self.standard_dir)):
                    # Check and see if we already have a copy of the standard
                    # language model
                    cmd = [
                        'git', 'clone', '-b', self.language,
                        'https://github.com/NaomiProject/CMUSphinx_standard_language_models.git',
                        self.standard_dir
                    ]
                    completedprocess = run_command(cmd)
                    response.append(
                        process_completedprocess(completedprocess,
                                                 output='html'))
                    if (completedprocess.returncode != 0):
                        continue_next = False
                response.append("Environment configured")
                nextcommand = "prepareworkingdir"
            if (command == "prepareworkingdir"):
                # At this point, we should have the standard model we need
                if (check_pocketsphinx_model(self.standard_dir)):
                    # FIXME It might be safest to remove the working dir at this
                    # point if it already exists
                    if not os.path.isdir(self.model_dir):
                        # Copy the sphinx model into model_dir
                        shutil.copytree(self.standard_dir, self.model_dir)
                    if (check_pocketsphinx_model(self.model_dir)):
                        query = " ".join([
                            "select", " rowid,", " case",
                            "  when length(trim(verified_transcription))>0",
                            "   then (length(trim(verified_transcription))-length(replace(trim(verified_transcription),' ','')))+1",
                            "  else 0", " end as WordCount,", " filename,",
                            " upper(trim(replace(replace(verified_transcription,'?',''),',',''))) as transcription",
                            "from audiolog",
                            "where type in('active','passive') and reviewed!=''"
                        ])
                        df = pd.read_sql_query(query, conn)
                        # Take the above and create naomi.fileids and naomi.transcription
                        # fileids:
                        description.append("on {} wav files".format(
                            str(df.shape[0])))
                        response.append("Adapting on {} wav files".format(
                            df.shape[0]))
                        with open(
                                os.path.join(self.working_dir,
                                             "naomi.fileids"), "w+") as f:
                            for filename in df['filename']:
                                # No need to copy file, just leave it in audiolog
                                f.write("{}\n".format(
                                    filename.rsplit(".", 1)[0]))
                        with open(
                                os.path.join(self.working_dir,
                                             "naomi.transcription"),
                                "w+") as f:
                            for t in df['transcription']:
                                f.write("<s> {} </s>\n".format(t.lower()))
                        nextcommand = "featureextraction"
                    else:
                        response.append(
                            "Error: failed to populate working model")
            if (command == "featureextraction"):
                cmd = [
                    'sphinx_fe', '-argfile',
                    os.path.join(self.model_dir,
                                 'feat.params'), '-samprate', '16000', '-c',
                    os.path.join(self.working_dir, 'naomi.fileids'), '-di',
                    self.audiolog_dir, '-do', self.working_dir, '-ei', 'wav',
                    '-eo', 'mfc', '-mswav', 'yes'
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "buildweights"
            if (command == "buildweights"):
                bw = '/usr/lib/sphinxtrain/bw'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/bw'):
                    bw = '/usr/local/libexec/sphinxtrain/bw'
                cmd = [
                    bw, '-hmmdir', self.model_dir, '-moddeffn',
                    os.path.join(self.model_dir,
                                 'mdef.txt'), '-ts2cbfn', '.ptm.', '-feat',
                    '1s_c_d_dd', '-svspec', '0-12/13-25/26-38', '-cmn',
                    'current', '-agc', 'none', '-dictfn',
                    os.path.join(self.model_dir, 'cmudict.dict'), '-ctlfn',
                    os.path.join(self.working_dir, 'naomi.fileids'), '-lsnfn',
                    os.path.join(self.working_dir, 'naomi.transcription'),
                    '-cepdir', self.working_dir, '-accumdir', self.working_dir
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "mllr"
            if (command == "mllr"):
                # MLLR is a cheap adaptation method that is suitable when the amount of data is limited. It's good for online adaptation.
                # MLLR works best for a continuous model. It's effect for semi-continuous models is limited.
                mllr = '/usr/lib/sphinxtrain/mllr_solve'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/mllr_solve'):
                    mllr = '/usr/local/libexec/sphinxtrain/mllr_solve'
                cmd = [
                    mllr, '-meanfn',
                    os.path.join(self.model_dir, 'means'), '-varfn',
                    os.path.join(self.model_dir, 'variances'), '-outmllrfn',
                    os.path.join(self.model_dir, 'mllr_matrix'), '-accumdir',
                    self.working_dir
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "map"
            if (command == "map"):
                # Update the acoustic model files with MAP
                # In this case, unlike MLLR, we don't create a generic transform, but update each parameter in the model
                # We copy the acoustic model directory and overwrite the new directory with the adapted model files
                if (os.path.isdir(self.adapt_dir)):
                    # Remove the adapt dir
                    shutil.rmtree(self.adapt_dir)
                    response.append("Cleared adapt directory {}".format(
                        self.adapt_dir))
                shutil.copytree(self.model_dir, self.adapt_dir)
                map_adapt = '/usr/lib/sphinxtrain/map_adapt'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/map_adapt'):
                    map_adapt = '/usr/local/libexec/sphinxtrain/map_adapt'
                cmd = [
                    map_adapt, '-moddeffn',
                    os.path.join(self.model_dir,
                                 'mdef.txt'), '-ts2cbfn', '.ptm.', '-meanfn',
                    os.path.join(self.model_dir, 'means'), '-varfn',
                    os.path.join(self.model_dir, 'variances'), '-mixwfn',
                    os.path.join(self.model_dir, 'mixture_weights'), '-tmatfn',
                    os.path.join(self.model_dir, 'transition_matrices'),
                    '-accumdir', self.working_dir, '-mapmeanfn',
                    os.path.join(self.adapt_dir, 'means'), '-mapvarfn',
                    os.path.join(self.adapt_dir, 'variances'), '-mapmixwfn',
                    os.path.join(self.adapt_dir,
                                 'mixture_weights'), '-maptmatfn',
                    os.path.join(self.adapt_dir, 'transition_matrices')
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "sendump"
            if (command == "sendump"):
                # Recreating the adapted sendump file
                # a sendump file saves space and is supported by pocketsphinx
                mk_s2sendump = '/usr/lib/sphinxtrain/mk_s2sendump'
                if os.path.isfile(
                        '/usr/local/libexec/sphinxtrain/mk_s2sendump'):
                    mk_s2sendump = '/usr/local/libexec/sphinxtrain/mk_s2sendump'
                cmd = [
                    mk_s2sendump, '-pocketsphinx', 'yes', '-moddeffn',
                    os.path.join(self.adapt_dir, 'mdef.txt'), '-mixwfn',
                    os.path.join(self.adapt_dir, 'mixture_weights'),
                    '-sendumpfn',
                    os.path.join(self.adapt_dir, 'sendump')
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "updateprofile"
            if (command == "updateprofile"):
                # Format the dictionary
                # Remove whitespace at the beginning of each line
                # Remove (#) after first word
                # collapse multiple whitespaces into a single space
                # Remove any whitespaces from the end
                with open(os.path.join(self.adapt_dir, "cmudict.dict"),
                          "r") as in_file:
                    with open(self.formatteddict_path, "w+") as out_file:
                        for line in in_file:
                            # Remove whitespace at beginning and end
                            line = line.strip()
                            # remove the number in parentheses (if there is one)
                            line = re.sub('([^\\(]+)\\(\\d+\\)', '\\1', line)
                            # compress all multiple whitespaces into a single whitespace
                            line = re.sub('\s+', ' ', line)
                            # replace the first whitespace with a tab
                            line = line.replace(' ', '\t', 1)
                            print(line, file=out_file)
                # Use phonetisaurus to prepare an fst model
                cmd = [
                    "phonetisaurus-train", "--lexicon",
                    self.formatteddict_path, "--seq2_del", "--dir_prefix",
                    os.path.join(self.adapt_dir, "train")
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode == 0):
                    # Now set the values in profile
                    profile.set_profile_var(['pocketsphinx', 'fst_model'],
                                            os.path.join(
                                                self.adapt_dir, "train",
                                                "model.fst"))
                    profile.set_profile_var(['pocketsphinx', 'hmm_dir'],
                                            self.adapt_dir)
                    profile.save_profile()
                    # Also run through the list of words that have been used
                    # that are not the wake word or a command word and make
                    # sure they are all identified so pocketsphinx can match
                    # them and not get confused on word boundaries.
                    # Pull a list of all words spoken from
                    # verified translations
                    query = " ".join([
                        "with recursive split(", " word,", " rest", ") as (",
                        " select", "  '',"
                        "  upper(replace(replace(verified_transcription,'?',''),',','')) || ' '",
                        " from audiolog",
                        " where type in ('active','passive') and reviewed!=''",
                        " union all select",
                        "  substr(rest, 0, instr(rest,' ')),",
                        "  substr(rest,instr(rest,' ')+1)",
                        " from split where rest <> ''"
                        ")",
                        "select word from split where word!='' group by word"
                    ])
                    c.execute(query)
                    words_used = [x[0].upper() for x in c.fetchall()]
                    # Pull the list of words from the local standard phrases
                    keywords = profile.get_profile_var(['keyword'])
                    if (isinstance(keywords, str)):
                        keywords = [keywords]
                    phrases = [keyword.upper() for keyword in keywords]
                    custom_standard_phrases_dir = paths.sub(
                        os.path.join("data", "standard_phrases"))
                    custom_standard_phrases_file = os.path.join(
                        custom_standard_phrases_dir,
                        "{}.txt".format(self.language))
                    if (os.path.isfile(custom_standard_phrases_file)):
                        with open(custom_standard_phrases_file, mode="r") as f:
                            for line in f:
                                phrase = line.strip().upper()
                                if phrase:
                                    phrases.append(phrase)
                    # Get all the phrases that the plugins are looking for
                    ps = pluginstore.PluginStore()
                    ps.detect_plugins("speechhandler")
                    for info in ps.get_plugins_by_category("speechhandler"):
                        try:
                            plugin = info.plugin_class(info,
                                                       profile.get_profile())
                            # get_phrases is vestigial now
                            if (hasattr(plugin, "get_phrases")):
                                for phrase in plugin.get_phrases():
                                    phrases.extend([
                                        word.upper()
                                        for word in phrase.split()
                                    ])
                            # get the phrases from the plugin intents
                            if (hasattr(plugin, "intents")):
                                intents = plugin.intents()
                                for intent in intents:
                                    for template in intents[intent]['locale'][
                                            self.language]['templates']:
                                        phrases.extend([
                                            word.upper()
                                            for word in template.split()
                                        ])
                        except Exception as e:
                            message = "Unknown"
                            if hasattr(e, "message"):
                                message = e.message
                            response.append(
                                "Plugin {} skipped! (Reason: {})".format(
                                    info.name, message))
                            self._logger.warning(
                                "Plugin '{}' skipped! (Reason: {})".format(
                                    info.name, message),
                                exc_info=True)

                    # Get the set of all words in words_used that do not appear
                    # in phrases
                    print("Phrases:")
                    print(phrases)
                    new_phrases = [
                        word for word in words_used if word not in phrases
                    ]
                    response.append("{} new phrases detected".format(
                        len(new_phrases)))
                    description.append("adding {} new phrases".format(
                        len(new_phrases)))
                    if (len(new_phrases) > 0):
                        table = "<table><tr><th>new phrase</th></tr>"
                        # Append the new phrases to the custom
                        # standard_phrases\{language}.txt file
                        if (not os.path.isdir(custom_standard_phrases_dir)):
                            os.makedirs(custom_standard_phrases_dir)
                        with open(custom_standard_phrases_file,
                                  mode="a+") as f:
                            for word in new_phrases:
                                table += "<tr><td>{}</td></tr>".format(word)
                                print(word, file=f)
                        table += "</table>"
                        response.append(table)
                    # Finally, force naomi to regenerate all of the
                    # pocketsphinx vocabularies by deleting all the
                    # vocabularies/{language}/sphinx/{}/revision
                    # files:
                    for revision_file in glob.glob(
                            paths.sub('vocabularies', self.language, 'sphinx',
                                      "*", "revision")):
                        os.remove(revision_file)
                    # Add the description
                    c.execute('''insert into trainings values(?,?,?)''',
                              (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                               'Adapt Pocketsphinx', " ".join(description)))
                    conn.commit()
                else:
                    continue_next = False
        except Exception as e:
            continue_next = False
            message = "Unknown"
            if hasattr(e, "message"):
                message = e.message
            self._logger.error("Error: {}".format(message), exc_info=True)
            response.append('<span class="failure">{}</span>'.format(message))
        if not continue_next:
            nextcommand = ""
        return response, nextcommand, description
Esempio n. 23
0
def application(environ, start_response):
    keyword = profile.get_profile_var(["keyword"], ["Naomi"])
    if (isinstance(keyword, list)):
        keyword = keyword[0]
    print("PATH_INFO=%s" % environ["PATH_INFO"])
    if (environ["PATH_INFO"] == "/favicon.ico"):
        start_response('404 Not Found',
                       [('content-type', 'text/plain;charset=utf-8')])
        ret = ["404 Not Found"]
        return [line.encode("UTF-8") for line in ret]
    else:
        audiolog_dir = paths.sub("audiolog")
        audiolog_db = os.path.join(audiolog_dir, "audiolog.db")
        wavfile = ""
        rowID = ""
        first_rowID = ""
        prev_rowID = ""
        next_rowID = ""
        result = ""
        speaker = ""
        verified_transcription = ""
        post_data = ""
        engine = ""
        verified_intent = ""
        description = []
        reQS = re.compile("([^=]+)=([^&]*)&?")

        # gather parameters from GET
        if (environ["QUERY_STRING"]):
            for namevalue in reQS.findall(environ["QUERY_STRING"]):
                if (namevalue[0].lower() == "wavfile"):
                    wavfile = os.path.join(audiolog_dir, namevalue[1])
                if (namevalue[0].lower() == "rowid"):
                    rowID = namevalue[1]

        # gather parameters from POST
        content_length = 0
        if (environ['CONTENT_LENGTH']):
            content_length = int(environ['CONTENT_LENGTH'])
            post_data = environ['wsgi.input'].read(content_length).decode(
                "UTF-8")
            # Parse it out
            for namevalue in reQS.findall(post_data):
                if (namevalue[0].lower() == "rowid"):
                    rowID = namevalue[1].lower()
                if (namevalue[0].lower() == "result"):
                    result = namevalue[1].lower()
                if (namevalue[0].lower() == "verified_transcription"):
                    verified_transcription = unquote(namevalue[1].replace(
                        '+', ' '))
                if (namevalue[0].lower() == "engine"):
                    engine = unquote(namevalue[1])
                if (namevalue[0].lower() == "command"):
                    command = unquote(namevalue[1].lower())
                if (namevalue[0].lower() == "description"):
                    description.append(unquote(namevalue[1]))
                if (namevalue[0].lower() == "speaker"):
                    speaker = namevalue[1].replace('+', ' ')
                if (namevalue[0].lower() == "verified_intent"):
                    verified_intent = namevalue[1].replace('+', ' ')

        # Handle the request
        # serve a .wav file
        ErrorMessage = None
        if (len(wavfile) and os.path.isfile(wavfile)):
            start_response('200 OK', [('content-type', 'audio/wav')])
            with open(wavfile, "rb") as w:
                ret = [w.read()]
            return ret
        # open a connection to the database
        try:
            conn = sqlite3.connect(audiolog_db)
        except sqlite3.OperationalError:
            ret = []
            start_response('200 OK',
                           [('content-type', 'text/html;charset=utf-8')])
            ret.append(
                "<html><head><title>Could not open database</title></head>")
            ret.append("<body><h2>Could not open database file {}</h2>".format(
                audiolog_db))
            ret.append(
                "<p>Try adding the following lines to your profile ({}) and then asking me a few questions:<br />"
                .format(profile.profile_file))
            ret.append("<pre>\taudiolog:\n\t\tsave_audio\n</pre>")
            return [line.encode("UTF-8") for line in ret]
        c = conn.cursor()
        # Check and make sure the speaker field exists
        c.execute("select distinct speaker from audiolog order by 1")
        # fetchall returns all rows as tuples, take the first (and only)
        # element of each tuple
        speakers = [speaker[0] for speaker in c.fetchall()]
        # Start the html response
        ret = []
        # serve a train response. We will put this in a div on the Train
        # tab, so we don't have to regenerate everything.
        if (len(engine)):
            start_response('200 OK',
                           [('content-type', 'text/json;charset=utf-8')])
            continue_next = True
            nextcommand = ""
            response = []
            found_plugin = False
            for info in plugins.get_plugins_by_category('stt_trainer'):
                if (info.name == engine):
                    found_plugin = True
                    try:
                        plugin = info.plugin_class(info, profile.get_profile())
                        print("plugin.HandleCommand({}, {})".format(
                            command, description))
                        response, nextcommand, description = plugin.HandleCommand(
                            command, description)
                    except Exception as e:
                        _logger.warn(
                            "Plugin '{}' skipped! (Reason: {})".format(
                                info.name, e.message
                                if hasattr(e, 'message') else 'Unknown'),
                            exc_info=True)
            if (not found_plugin):
                response = ["Unknown STT Trainer: {}".format(engine)]
            # Prepare the json response
            messagetext = "<br /><br />\n".join(response)
            if (not continue_next):
                nextcommand = ""
            jsonstr = json.dumps({
                'message': messagetext,
                'engine': engine,
                'command': nextcommand,
                'description': description
            })
            ret.append(jsonstr)
        else:
            start_response('200 OK',
                           [('content-type', 'text/html;charset=utf-8')])
            ret.append(
                '<html><head><title>{} STT Training</title>'.format(keyword))
            # Return the main page
            try:
                # If we are performing an update,
                # do so and fetch the next row id
                if (result and rowID):
                    print("Result: {}".format(result))
                    # rowid should have been passed in
                    # if the rowid that was passed in does not exist,
                    # the following lines will have no effect
                    # FIXME: in this case, an error should be returned.
                    Update_record = Get_row(c, rowID)
                    now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
                    if (Update_record):
                        # Since an audio file can be associated with more
                        # than one transcription (since both a passive and
                        # active transcription can be run on the same audio
                        # file) we need to update each record with the same
                        # transcription with its own WER.
                        # Get a list of records that have the same filename
                        result = result.lower()
                        if (result == "correct"):
                            # if the current result matches the transcription,
                            # then verified transcription = transcription
                            # and the type should remain unchanged
                            verified_transcription = Update_record[
                                'Transcription']
                        if (result == "noise"):
                            # If the user selected "noise" then the verified
                            # transcription is blank
                            verified_transcription = ""
                        if (result == "unclear"):
                            # Unclear means that there is no verified
                            # transcription and the sample is unusable
                            # for either training the speech recognition
                            # or the noise detection. We can go ahead
                            # and run WER on it, but it is unlikely to
                            # be used.
                            verified_transcription = ""
                            recording_type = "unclear"
                        verified_transcription = verified_transcription.strip()
                        filename = Update_record["Filename"]
                        c.execute(
                            " ".join([
                                "select", " RowID,", " transcription,",
                                " type", "from audiolog",
                                "where RowID=:RowID or (",
                                " filename=:filename and reviewed=''", ")"
                            ]), {
                                'RowID': rowID,
                                'filename': filename
                            })
                        for row in c.fetchall():
                            rowID = row[0]
                            transcription = row[1]
                            recording_type = row[2]
                            if (len(verified_transcription) == 0):
                                recording_type = "noise"
                                print("Setting recording_type to noise")
                                if (result == "unclear"):
                                    recording_type = "unclear"
                                    print("Setting recording_type to unclear")
                            # calculate the word error rate
                            WER = 0
                            if (len(transcription) > 0):
                                WER = wer(transcription,
                                          verified_transcription)
                            c.execute(
                                " ".join([
                                    "update audiolog set ", " type=:type,",
                                    " verified_transcription=:vt,",
                                    " speaker=:speaker,"
                                    " reviewed=:reviewed,", " wer=:wer,",
                                    " verified_intent=:verified_intent",
                                    "where RowID=:RowID"
                                ]), {
                                    "type": recording_type,
                                    "vt": verified_transcription,
                                    "speaker": speaker,
                                    "reviewed": now,
                                    "wer": WER,
                                    "verified_intent": verified_intent,
                                    "RowID": rowID
                                })
                            conn.commit()
                        # fetch the next unreviewed rowid
                        rowID = fetch_next_unreviewed_rowID(c, rowID)
                    else:
                        ErrorMessage = "Row ID {} does not exist".format(
                            str(rowID))
                # get the first rowID
                first_rowID = fetch_first_rowID(c)
                # get the current rowID
                try:
                    rowID = fetch_current_rowID(c, rowID)
                except ValueError:
                    ErrorMessage = "Row {} not found".format(rowID)
                    rowID = fetch_current_rowID(c, None)
                # get the previous rowid
                prev_rowID = fetch_prev_rowID(c, rowID)
                # get the next rowid
                next_rowID = fetch_next_rowID(c, rowID)
                totalRows = fetch_total_rows(c)

                if (len(first_rowID)):
                    ret.append("""
<meta charset="utf-8"/>
<style type="text/css">
 /* Style the tab */
.tab {
  overflow: hidden;
  border: 1px solid #ccc;
  background-color: #f1f1f1;
}
/* Style the buttons that are used to open the tab content */
.tab button {
  background-color: inherit;
  float: left;
  border: none;
  outline: none;
  cursor: pointer;
  padding: 14px 16px;
  transition: 0.3s;
}
/* Change background color of buttons on hover */
.tab button:hover {
  background-color: #ddd;
}
/* Create an active/current tablink class */
.tab button.active {
  background-color: #ccc;
}
/* Style the tab content */
.tabcontent {
  display: none;
  padding: 6px 12px;
  border: 1px solid #ccc;
  border-top: none;
}
.tabcontent.active {
  display: block;
}
.success {
  color: #0f0; /* green */
}
.failure {
  color: #f00; /* red */
}
</style>
<script language="javascript">
    var spin=0; // global spinner control
    var spintimer;
    function startSpinner(){
        // kill any old spinner
        window.clearTimeout(spintimer);
        spin=1;
        spintimer=window.setTimeout(function(){moveSpinner(0)},250);
    }
    function moveSpinner(position){
        var s=document.getElementById("spinner");
        switch(position){
            case 0:
                s.innerHTML="-";
                break;
            case 1:
                s.innerHTML="\\\\";
                break;
            case 2:
                s.innerHTML="|";
                break;
            case 3:
                s.innerHTML="/";
                break;
        }
        if(spin){
            spintimer=window.setTimeout(function(){moveSpinner((position+1)%4)},250);
        }else{
            s.innerHTML="";
        }
    }
    function stopSpinner(){
        window.clearTimeout(spintimer);
        spin=0;
    }
    function openTab(evt, tabName) {
        // Declare all variables
        var i, tabcontent, tablinks;
        // Get all elements with class="tabcontent" and hide them
        tabcontent = document.getElementsByClassName("tabcontent");
        for (i = 0; i < tabcontent.length; i++) {
            tabcontent[i].className = "tabcontent";
        }
        // Get all elements with class="tablinks" and remove the class "active"
        tablinks = document.getElementsByClassName("tablinks");
        for (i = 0; i < tablinks.length; i++) {
            tablinks[i].className = tablinks[i].className.replace(" active", "");
        }
        // Show the current tab, and add an "active" class to the button that opened the tab
        document.getElementById(tabName).className = "tabcontent active";
        evt.currentTarget.className += " active";
    }

    // Submit an updated transcription to the server. Upon success,
    // make the "revert" button inactive
    function UpdateTranscription(RowID){
        var Transcription=document.getElementById("transcription_"+RowID).value;
        alert( "Transcription="+Transcription );
        var xhttp=new XMLHttpRequest();
        xhttp.onreadystatechange=function(){
            if( this.readyState==4 && this.status==200 ){
                // Check this.responseText
                var message=JSON.parse(this.responseText).message;
                if( message=="SUCCESS;Updated "+RowID ){
                    // disable reset button
                    document.getElementById("reset_"+RowID).disabled=true;
                }else{
                    //alert( "message="+message );
                }
            }else{
                //alert( "responseText="+this.responseText );
            }
        }
        xhttp.open("POST",window.location.href.split(/[?#]/)[0],true);
        var request=JSON.stringify({"action":"update","RowID":RowID,"Transcription":Transcription});
        xhttp.send(request);
    }

    // Delete a line from the database and, if the response is success,
    // delete the line from the page also.
    function DeleteAudio(RowID){
        var xhttp=new XMLHttpRequest();
        xhttp.onreadystatechange=function(){
            if( this.readyState==4 && this.status==200 ){
                // Check this.responseText to make sure it contains a success message
                var message=JSON.parse(this.responseText).message;
                if( message=="SUCCESS;Deleted "+RowID ){
                    document.getElementById("r"+RowID).parentNode.removeChild(document.getElementById("r"+RowID));
                }else{
                    //alert(message);
                }
            }
        };
        xhttp.open("POST",window.location.href.split(/[?#]/)[0],true);
        var request='{"action":"delete","RowID":"'+RowID+'"}';
        xhttp.send(request);
    }

    function GoRowID(RowID){
        document.location.href="http://"+window.location.host+window.location.pathname+"?RowID="+RowID;
    }

    function ValidateForm(){
        var Checked=document.querySelector("input[name='result']:checked");
        var Ret=true;
        if( !Checked ){
            Ret=false;
            alert("Please select an option");
        }
        return Ret;
    }

    function Train(clear, engine, command, description){
        stopSpinner();
        if(clear){
            document.getElementById("Result").innerHTML = "";
        }
        var xhttp = new XMLHttpRequest();
        xhttp.onreadystatechange = function(){
            if(this.readyState==4){
                stopSpinner();
                if(this.status==200){
                    var response=JSON.parse(this.responseText);
                    document.getElementById("Result").innerHTML += response.message + '<br /><br />';
                    if(response.command){
                        var description = "";
                        if(response.description){
                            description = response.description;
                        }
                        Train(false,response.engine,response.command,description);
                    }else{
                        document.getElementById("Result").innerHTML += "<h2>Training Complete</h2>";
                    }
                }else{
                    document.getElementById("Result").innerHTML += "An error occurred. ReadyState: "+this.readyState+" Status: "+this.status+"<br />"+this.responseText;
                }
            }
        };
        url = location.toString().replace(location.search, "");

        xhttp.open("POST",url,true);
        xhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
        xhttp.send("engine="+encodeURIComponent(engine)+"&command="+encodeURIComponent(command)+"&description="+encodeURIComponent(description));
        startSpinner();
    }
</script>""")
                    ret.append('''
</head>
<body>
<!-- Tab links -->
<div class="tab">
  <button class="tablinks active" onclick="openTab(event, 'Verify')">Verify transciptions</button>
  <button class="tablinks" onclick="openTab(event, 'Train')">Train STT Engines</button>
</div>
<!-- Tab content -->
<div id="Verify" class="tabcontent active">
                    ''')

                    Current_record = Get_row(c, rowID)

                    # if this has been reviewed, figure out
                    # what option was selected
                    Checked = 'checked="checked"'
                    Unchecked = ''
                    Disabled = 'disabled="disabled"'
                    Enabled = ''
                    Result_correct = Unchecked
                    Result_update = Unchecked
                    Result_nothing = Unchecked
                    Result_unclear = Unchecked
                    Verified_transcription_state = Disabled
                    if (len(Current_record["Reviewed"])):
                        if (Current_record["Verified_transcription"]):
                            if (Current_record["Transcription"] ==
                                    Current_record["Verified_transcription"]):
                                Result_correct = Checked
                            else:
                                Result_update = Checked
                                Verified_transcription_state = Enabled
                        else:
                            if (Current_record["Type"] == "noise"):
                                Result_nothing = Checked
                            else:
                                Result_unclear = Checked

                    if (not Current_record["Verified_transcription"]):
                        Current_record[
                            "Verified_transcription"] = Current_record[
                                "Transcription"]

                    # Serve the body of the page
                    if (Debug):
                        # Debug info
                        ret.append("""<ul>""")
                        ret.append(
                            """<li>post_data: {}</li>""".format(post_data))
                        ret.append("""<li>Result: {}</li>""".format(result))

                        if (result == "update"):
                            ret.append(
                                "<li>Verified_transcription: {}</li>".format(
                                    verified_transcription))
                        ret.append("</ul>")

                        ret.append("<ul>")
                        ret.append("<li>Recorded: {}</li>".format(
                            Current_record["Recorded"]))
                        ret.append("<li>Filename: {}</li>".format(
                            Current_record["Filename"]))
                        ret.append("<li>Type: {}</li>".format(
                            Current_record["Type"]))
                        ret.append("<li>Transcription: {}</li>".format(
                            Current_record["Transcription"]))
                        ret.append(
                            "<li>Verified_transcription: {}</li>".format(
                                Current_record["Verified_transcription"]))
                        ret.append("<li>Speaker: {}</li>".format(
                            Current_record["Speaker"]))
                        ret.append("<li>Speaker: {}</li>".format(
                            Current_record["Speaker"]))
                        ret.append("<li>Reviewed: {}</li>".format(
                            Current_record["Reviewed"]))
                        ret.append("<li>Wer: {}</li>".format(
                            Current_record["WER"]))
                        ret.append("<li>Result_correct: {}</li>".format(
                            Result_correct))
                        ret.append("""<li>Result_update: {}</li>""".format(
                            Result_update))
                        ret.append("""<li>Result_nothing: {}</li>""".format(
                            Result_nothing))
                        ret.append("""</ul>""")

                    ret.append(
                        """<h1>{} transcription {} of {} ({})</h1>""".format(
                            keyword, rowID, totalRows, Current_record["Type"]))
                    if (ErrorMessage):
                        ret.append(
                            """<p class="Error">{}</p>""".format(ErrorMessage))
                    ret.append(" ".join([
                        '<audio', 'controls="controls"', 'type="audio/wav"',
                        'style="width:100%%">', '<source src="?wavfile={}" />',
                        '</audio><br />'
                    ]).format(Current_record["Filename"]))
                    ret.append(' '.join([
                        '{} heard',
                        '"<span style="font-weight:bold">{}</span>"<br />'
                    ]).format(keyword, Current_record["Transcription"]))
                    ret.append("What did you hear?<br />")
                    ret.append(' '.join([
                        '<form method="POST"',
                        'onsubmit="return ValidateForm()">'
                    ]))
                    ret.append(
                        '<input type="hidden" name="RowID" value="{}"/>'.
                        format(rowID))
                    ret.append(
                        """<input type="radio" id="update_result_correct" name="result" value="correct" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_correct">The transcription is correct. I heard the same thing</label><br />"""
                        .format(Result_correct))
                    ret.append(
                        """<input type="radio" id="update_result_update" name="result" value="update" {} onclick="document.getElementById('update_verified_transcription').disabled=false"/> <label for="update_result_update">The transcription is not correct. This is what I heard:</label><br /><textarea id="update_verified_transcription" name="verified_transcription" style="margin-left: 20px" {}>{}</textarea><br />"""
                        .format(Result_update, Verified_transcription_state,
                                Current_record["Verified_transcription"]))
                    ret.append(
                        """<input type="radio" id="update_result_nothing" name="result" value="noise" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_nothing">This was just noise with no voices.</label><br />"""
                        .format(Result_nothing))
                    ret.append(
                        """<input type="radio" id="update_result_unclear" name="result" value="unclear" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_unclear">This was not directed to {} or was too unclear to understand.</label><br />"""
                        .format(Result_unclear, keyword))
                    ret.append(
                        """<label for="Speaker">Speaker</label><br /><input type="text" id="Speaker" name="Speaker" value="{}" list="speakerList"><datalist id="speakerList">"""
                        .format(Current_record["Speaker"] if len(
                            Current_record["Speaker"]) else speaker))
                    for speaker in speakers:
                        ret.append("""<option value="{}">""".format(speaker))
                    ret.append("""</datalist><br /><br />""")
                    if (Current_record["Type"] == 'active'):
                        Verified_intent = Current_record["verified_intent"]
                        if (Verified_intent == "None"):
                            Verified_intent = Current_record["intent"]
                        ret.append("""Intent: {} ({})<br />""".format(
                            Current_record["intent"], Current_record["score"]))
                        ret.append(
                            """Correct intent: <select name="Verified_Intent">"""
                        )
                        ret.append(
                            """<option value="unclear">unclear</option>""")
                        for intent in fetch_intents(c):
                            selected = ""
                            if (intent == Verified_intent):
                                selected = " selected"
                            ret.append("""<option{}>{}</option>""".format(
                                selected, intent))
                        ret.append("""</select><br /><br />""")
                    ret.append('<input type="submit" value="Submit"/><br />')
                    if (prev_rowID):
                        ret.append(' '.join([
                            '<input type="button" value="Prev"',
                            'onclick="GoRowID({})"/>'
                        ]).format(prev_rowID))
                    if (next_rowID):
                        ret.append(' '.join([
                            '<input type="button" value="Next"',
                            'onclick="GoRowID({})"/>'
                        ]).format(next_rowID))
                    else:
                        ret.append("""All transcriptions verified""")
                    ret.append('''
</div><!-- Verify -->
<div id="Train" class="tabcontent">
<form name="Train">
                    ''')
                    for info in plugins.get_plugins_by_category('stt_trainer'):
                        ret.append(
                            '''<input type="button" value="{plugin_name}" onclick="Train(true,'{plugin_name}','','')"><br />'''
                            .format(plugin_name=info.name))
                    ret.append('''
</form>
<div id="Result">
</div>
<div id="spinner">
</div>
</div><!-- Train -->
                    ''')
                    ret.append("""</body></html>""")
                else:
                    ret = [
                        "".join([
                            "<html>",
                            "<head><title>Nothing to validate</title></head>",
                            "<body><h1>Nothing to validate</h1></body></html>"
                        ])
                    ]
            except sqlite3.OperationalError as e:
                ret.append("".join(
                    ['</head>', '<body>SQLite error: {}</body>',
                     '</html>']).format(e))
        # Save (commit) the changes
        conn.commit()
        conn.close()
        return [line.encode("UTF-8") for line in ret]