def check_wiktionary_language(target_lang): seen_codes = {} for lang_name in LANGUAGE_NAMES[target_lang]: if lang_name.startswith('Proto-'): lang_name = lang_name[6:] code = str(find_name('language', lang_name, target_lang)) assert code not in seen_codes, \ "%r and %r have the same code" % (seen_codes[code], lang_name) seen_codes[code] = lang_name
def CQS_match_query_phrase(self, phrase): """Analyze phrase to see if it is a play-able phrase with this skill. Needs to be implemented by the skill. Arguments: phrase (str): User phrase, "What is an aardwark" Returns: (match, CQSMatchLevel[, callback_data]) or None: Tuple containing a string with the appropriate matching phrase, the PlayMatch type, and optionally data to return in the callback if the match is selected. """ response = None match = self.intents.calc_intent(phrase) level = CQSMatchLevel.CATEGORY data = match.matches intent = match.name score = match.conf data["intent"] = intent data["score"] = score if score > 0.8: level = CQSMatchLevel.EXACT elif score > 0.5: level = CQSMatchLevel.CATEGORY elif score > 0.3: level = CQSMatchLevel.GENERAL else: intent = None if intent: # Validate extracted entities country = data.get("country") region = data.get("region") language = data.get("language") if country: data["query"] = country # ensure we really have a country name response = self.dialog_renderer.render("bad_country", {}) match, score = match_one(country.lower(), list(self.countries_data.keys())) self.log.debug("Country fuzzy match: {n}, Score: {s}".format( n=match, s=score)) if score > 0.5: country = match data.update(self.countries_data[country]) else: countries = self.search_country(country) if not len(countries) > 0: level = CQSMatchLevel.GENERAL else: country = countries[0]["name"] data.update(countries[0]) # TODO disambiguation if len(countries) > 1: data["disambiguation"] = countries[1:] self.log.debug("multiple matches found: " + str([c["name"] for c in countries])) data["country"] = country # normalized from match if language: data["query"] = language # ensure we really have a language name words = language.split(" ") clean_up = ["is"] # remove words commonly caught by mistake in padatious language = " ".join( [word for word in words if word not in clean_up]) lang_code = langcodes.find_name( 'language', language, langcodes.standardize_tag(self.lang)) lang_code = str(lang_code) self.log.debug("Detected lang code: " + lang_code) if not lang_code: return None data["lang_code"] = lang_code # TODO countries = self.search_country_by_language(lang_code) data["country_list"] = countries if region: data["query"] = region # ensure we really have a region name response = self.dialog_renderer.render("bad_region") countries = None match, score = match_one(region, self.regions) data["region_score"] = score if score > 0.5: region = match countries = self.search_country_by_region(region) match, score2 = match_one(region, self.subregions) data["subregion_score"] = score2 if score2 > score: region = match countries = self.search_country_by_subregion(region) if score > 0.8 and not country: level = CQSMatchLevel.EXACT elif score > 0.5 and not country: level = CQSMatchLevel.CATEGORY elif score > 0.3 and not country: level = CQSMatchLevel.GENERAL data["region"] = region self.log.debug("Detected region: " + region) data["country_list"] = countries # Get response from intents response = self.intent2answer(intent, data) or response if response: return (phrase, level, response, data) return None
def _language_name_to_code(name, name_language_code): try: found = langcodes.find_name('language', name, name_language_code) return str(found) except LookupError: return None