def query_entity(query: Query, session: Session, name: str) -> AnswerTuple: """ A query for an entity by name """ titles = _query_entity_definitions(session, name) articles = _query_article_list(session, name) response: ResponseDict = dict(answers=titles, sources=articles) if titles and "answer" in titles[0]: # 'Mál og menning er bókmenntafélag.' answer = titles[0]["answer"] v = answer.split() answer = cap_first(answer) uc_name = cap_first(name) for i, w in enumerate(v): if len(w) > 1 and w.isupper(): # Probably an abbreviation, such as 'FME' or 'BSÍ': # convert to 'F M E' v[i] = " ".join(w) voice_answer = uc_name + " er " + " ".join(v) + "." if "sources" in titles[0]: source = titles[0]["sources"][0]["domain"] query.set_source(source) query.set_context({"entity_name": uc_name}) else: answer = "Engin skilgreining finnst á nafninu '" + name + "'." voice_answer = "Ég veit ekki hvað " + name + " er." if query.is_voice: # Rather than accept this as a voice query # for an entity that is not found, return an # error and thereby give other query handlers # a chance to parse this query.set_error("E_ENTITY_NOT_FOUND") return response, answer, voice_answer
def _which_country_query(subject: str, q: Query): """ Generate answer to question concerning the country in which a given placename is located. """ info = location_info(subject, "placename") if not info: return False cc = info.get("country") if not cc: return False # Get country name w. preposition ("í Þýskalandi") desc = country_desc(cc) # Format answer answer = cap_first(desc) response = dict(answer=answer) voice = "{0} er {1}".format(subject, desc) q.set_answer(response, answer, voice) q.set_key(subject) cname = country_name_for_isocode(cc) if cname is not None: q.set_context(dict(subject=cname)) return True
def answer_for_location(loc: Tuple): # Send API request res = query_geocode_api_coords(loc[0], loc[1]) # Verify that we have at least one valid result if ( not res or "results" not in res or not len(res["results"]) or not res["results"][0] ): return None # Grab top result from API call top = res["results"][0] # TODO: Fall back on lower-ranked results from the API # if the top result doesn't even contain a locality. # Extract address info from top result street, num, locality, postcode, country_code = _addrinfo_from_api_result(top) descr = "" # Special handling of Icelandic locations since we have more info # about them and street/locality names need to be declined. if country_code == "IS": # We received a street name from the API if street: descr = street_desc(street, num, locality) # We at least have a locality (e.g. "Reykjavík") elif locality: descr = _locality_desc(locality) # Only country else: descr = country_desc("IS") # The provided location is abroad. else: sdesc = ("á " + street) if street else "" if num and street: sdesc += " " + num locdesc = ( "{0} {1}".format(iceprep_for_placename(locality), locality) if locality else "" ) # "[á Boulevard St. Germain] [í París] [í Frakklandi]" descr = "{0} {1} {2}".format(sdesc, locdesc, country_desc(country_code)).strip() if not descr: # Fall back on the formatted address string provided by Google descr = "á " + top.get("formatted_address") answer = cap_first(descr) response = dict(answer=answer) voice = "Þú ert {0}".format(_addr4voice(descr)) return response, answer, voice
def _addr2nom(address: str) -> str: """ Convert location name to nominative form. """ if address is None or address == "": return address try: nom = NounPhrase(cap_first(address)).nominative or address except Exception: nom = address return nom
def query_title(query: Query, session: Session, title: str) -> AnswerTuple: """ A query for a person by title """ # !!! Consider doing a LIKE '%title%', not just LIKE 'title%' # We impose a LIMIT of 1024 on each query result, # since the query may return many names (for instance 'Hver er formaður?'), # and getting more name mentions than this is not likely to significantly # affect the outcome. QUERY_LIMIT = 1024 rd: RegisterType = defaultdict(dict) title_lc = title.lower() # Query by lowercase title q = (session.query( Person.name, Article.id, Article.timestamp, Article.heading, Root.domain, Article.url, ).filter( Person.title_lc.like(title_lc + " %") | (Person.title_lc == title_lc)).filter(Root.visible == True).join( Article, Article.url == Person.article_url).join(Root).order_by( desc(cast(Column, Article.timestamp))).limit(QUERY_LIMIT).all()) # Append names from the persons table append_names(rd, q, prop_func=lambda x: x.name) # Also append definitions from the entities table, if any q = (session.query( Entity.name, Article.id, Article.timestamp, Article.heading, Root.domain, Article.url, ).filter(Entity.definition == title).filter(Root.visible == True).join( Article, Article.url == Entity.article_url).join(Root).order_by( desc(cast(Column, Article.timestamp))).limit(QUERY_LIMIT).all()) append_names(rd, q, prop_func=lambda x: x.name) response = make_response_list(rd) answer: str voice_answer: str if response and title and "answer" in response[0]: first_response = response[0] # Return 'Seðlabankastjóri er Már Guðmundsson.' upper_title = cap_first(title) answer = first_response["answer"] voice_answer = upper_title + " er " + answer + "." # Store the person name in the query context # so it can be referred to in subsequent queries query.set_context({"person_name": answer}) if first_response.get("sources"): first_source = first_response["sources"][0]["domain"] query.set_source(first_source) else: answer = "Ekkert nafn finnst með titilinn '" + title + "'." voice_answer = "Ég veit ekki hver er " + title + "." return response, answer, voice_answer
def query_which_route(query: Query, session: Session, result: Result): """ Which routes stop at a given bus stop """ stop_name = cast(str, result.stop_name) # 'Einarsnes', 'Fiskislóð'... if stop_name in {"þar", "þangað"}: # Referring to a bus stop mentioned earlier ctx = query.fetch_context() if ctx and "bus_stop" in ctx: stop_name = cast(str, ctx["bus_stop"]) result.qkey = stop_name else: answer = voice_answer = "Ég veit ekki við hvaða stað þú átt." response = dict(answer=answer) return response, answer, voice_answer bus_noun = result.bus_noun # 'strætó', 'vagn', 'leið'... stops = straeto.BusStop.named(stop_name, fuzzy=True) if not stops: a = [stop_name, "þekkist ekki."] va = ["Ég", "þekki", "ekki", "biðstöðina", stop_name.capitalize()] else: routes = set() if query.location: straeto.BusStop.sort_by_proximity(stops, query.location) stop = stops[0] for route_id in stop.visits.keys(): number = straeto.BusRoute.lookup(route_id).number routes.add(number) va = [bus_noun, "númer"] a = va[:] nroutes = len(routes) cnt = 0 for rn in sorted(routes, key=lambda t: int(t)): if cnt: sep = "og" if cnt + 1 == nroutes else "," va.append(sep) a.append(sep) # We convert inflectable numbers to their text equivalents # since the speech engine can't be relied upon to get the # inflection of numbers right va.append(numbers_to_neutral(rn)) a.append(rn) cnt += 1 tail = ["stoppar á", to_dative(stop.name)] va.extend(tail) a.extend(tail) # Store a location coordinate and a bus stop name in the context query.set_context({"location": stop.location, "bus_stop": stop.name}) voice_answer = correct_spaces(" ".join(va) + ".") answer = correct_spaces(" ".join(a)) answer = cap_first(answer) response = dict(answer=answer) return response, answer, voice_answer
def when_answ(q: Query, result): """ Generate answer to a question of the form "Hvenær er(u) [hátíðardagur]?" etc. """ # TODO: Fix this so it includes weekday, e.g. # "Sunnudaginn 1. október" # Use plural 'eru' for 'páskar', 'jól' etc. is_verb = "er" if "is_verb" not in result else result.is_verb date_str = result.desc + " " + is_verb + " " + result.target.strftime("%-d. %B") answer = voice = cap_first(date_str) # Put a spelled-out ordinal number instead of the numeric one, # in accusative case voice = re.sub(r"\d+\. ", _DAY_INDEX_ACC[result.target.day] + " ", voice) response = dict(answer=answer) q.set_key("WhenSpecialDay") q.set_answer(response, answer, voice)
def _which_continent_query(subject: str, q: Query): """ Generate answer to question concerning the continent on which a given country name or placename is located. """ # Get country code cc = isocode_for_country_name(subject) is_placename = False if not cc: # OK, the subject is not a country # Let's see if it's a placename info = location_info(subject, "placename") if not info: return False # We don't know where it is cc = info.get("country") is_placename = True if not cc: return False contcode = continent_for_country(cc) if contcode is None: continent = "óþekkt heimsálfa" continent_dat = "óþekktri heimsálfu" else: continent = ISO_TO_CONTINENT[contcode] continent_dat = nom2dat(continent) # Format answer answer = continent_dat response = dict(answer=answer) if is_placename: cd = country_desc(cc) voice = "Staðurinn {0} er {1}, sem er land í {2}".format( subject, cd, continent_dat) answer = "{0}, {1}".format(cap_first(cd), continent_dat) else: voice = "Landið {0} er í {1}".format(subject, continent_dat) q.set_answer(response, answer, voice) q.set_key(subject) q.set_context(dict(subject=continent)) return True
def get_wiki_summary(subject_nom: str) -> str: """ Fetch summary of subject from Icelandic Wikipedia """ def has_entry(r): return ( r and "query" in r and "pages" in r["query"] and "-1" not in r["query"]["pages"] ) # Wiki pages always start with an uppercase character cap_subj = cap_first(subject_nom) # Talk to API res = _query_wiki_api(cap_subj) # OK, Wikipedia doesn't have anything with current capitalization # or lack thereof. Try uppercasing first character of each word. titled_subj = subject_nom.title() if not has_entry(res) and cap_subj != titled_subj: res = _query_wiki_api(titled_subj) not_found = "Ég fann ekkert um '{0}' í Wikipedíu".format(subject_nom) if not has_entry(res): return not_found pages = res["query"]["pages"] keys = pages.keys() if not len(keys) or "-1" in keys: return not_found # Pick first matching entry k = sorted(keys)[0] text = pages[k].get("extract", "") return _clean_answer(text)
def test_query_utility_functions(): """ Tests for various utility functions used by query modules. """ from queries import ( natlang_seq, nom2dat, numbers_to_neutral, is_plural, sing_or_plur, country_desc, cap_first, time_period_desc, distance_desc, krona_desc, strip_trailing_zeros, iceformat_float, icequote, timezone4loc, # parse_num, ) assert natlang_seq(["Jón", "Gunna"]) == "Jón og Gunna" assert natlang_seq(["Jón", "Gunna", "Siggi"]) == "Jón, Gunna og Siggi" assert ( natlang_seq(["Jón", "Gunna", "Siggi"], oxford_comma=True) == "Jón, Gunna, og Siggi" ) assert nom2dat("hestur") == "hesti" assert nom2dat("Hvolsvöllur") == "Hvolsvelli" # assert parse_num("11") == 11 # assert parse_num("17,33") == 17.33 assert numbers_to_neutral("Öldugötu 4") == "Öldugötu fjögur" assert numbers_to_neutral("Fiskislóð 31") == "Fiskislóð þrjátíu og eitt" assert is_plural(22) assert is_plural(11) assert is_plural("76,3") assert is_plural(27.6) assert is_plural("19,11") assert not is_plural("276,1") assert not is_plural(22.1) assert not is_plural(22.41) assert sing_or_plur(21, "maður", "menn") == "21 maður" assert sing_or_plur(11, "köttur", "kettir") == "11 kettir" assert sing_or_plur(2.11, "króna", "krónur") == "2,11 krónur" assert sing_or_plur(172, "einstaklingur", "einstaklingar") == "172 einstaklingar" assert sing_or_plur(72.1, "gráða", "gráður") == "72,1 gráða" assert country_desc("DE") == "í Þýskalandi" assert country_desc("es") == "á Spáni" assert country_desc("IS") == "á Íslandi" assert country_desc("us") == "í Bandaríkjunum" assert cap_first("yolo") == "Yolo" assert cap_first("YOLO") == "YOLO" assert cap_first("Yolo") == "Yolo" assert time_period_desc(3751) == "1 klukkustund og 3 mínútur" assert ( time_period_desc(3751, omit_seconds=False) == "1 klukkustund, 2 mínútur og 31 sekúnda" ) assert time_period_desc(601) == "10 mínútur" assert time_period_desc(610, omit_seconds=False) == "10 mínútur og 10 sekúndur" assert time_period_desc(61, omit_seconds=False) == "1 mínúta og 1 sekúnda" assert ( time_period_desc(121, omit_seconds=False, case="þgf") == "2 mínútum og 1 sekúndu" ) assert distance_desc(1.1) == "1,1 kílómetri" assert distance_desc(1.2) == "1,2 kílómetrar" assert distance_desc(0.7) == "700 metrar" assert distance_desc(0.021) == "20 metrar" assert distance_desc(41, case="þf") == "41 kílómetra" assert distance_desc(0.215, case="þgf") == "220 metrum" assert krona_desc(361) == "361 króna" assert krona_desc(28) == "28 krónur" assert krona_desc(4264.2) == "4.264,2 krónur" assert krona_desc(2443681.1) == "2.443.681,1 króna" assert strip_trailing_zeros("17,0") == "17" assert strip_trailing_zeros("219.117,0000") == "219.117" assert strip_trailing_zeros("170") == "170" assert strip_trailing_zeros("170,0") == "170" assert iceformat_float(666.0) == "666" assert iceformat_float(666, strip_zeros=False) == "666,00" assert iceformat_float(217.296) == "217,3" assert iceformat_float(2528963.9) == "2.528.963,9" assert iceformat_float(123.12341, decimal_places=4) == "123,1234" assert iceformat_float(123.1000, strip_zeros=True) == "123,1" assert iceformat_float(123.0, decimal_places=4, strip_zeros=False) == "123,0000" assert icequote("sæll") == "„sæll“" assert icequote(" Góðan daginn ") == "„Góðan daginn“" assert timezone4loc((64.157202, -21.948536)) == "Atlantic/Reykjavik" assert timezone4loc((40.093368, 57.000067)) == "Asia/Ashgabat"
def query_arrival_time(query: Query, session: Session, result: Result): """ Answers a query for the arrival time of a bus """ # Examples: # 'Hvenær kemur strætó númer 12?' # 'Hvenær kemur leið sautján á Hlemm?' # 'Hvenær kemur næsti strætó í Einarsnes?' # Retrieve the client location, if available, and the name # of the bus stop, if given stop_name: Optional[str] = result.get("stop_name") stop: Optional[straeto.BusStop] = None location: Optional[Tuple[float, float]] = None if stop_name in {"þar", "þangað"}: # Referring to a bus stop mentioned earlier ctx = query.fetch_context() if ctx and "bus_stop" in ctx: stop_name = cast(str, ctx["bus_stop"]) else: answer = voice_answer = "Ég veit ekki við hvaða stað þú átt." response = dict(answer=answer) return response, answer, voice_answer if not stop_name: location = query.location if location is None: answer = "Staðsetning óþekkt" response = dict(answer=answer) voice_answer = "Ég veit ekki hvar þú ert." return response, answer, voice_answer # Obtain today's bus schedule global SCHEDULE_TODAY with SCHEDULE_LOCK: if SCHEDULE_TODAY is None or not SCHEDULE_TODAY.is_valid_today: # We don't have today's schedule: create it SCHEDULE_TODAY = straeto.BusSchedule() # Obtain the set of stops that the user may be referring to stops: List[straeto.BusStop] = [] if stop_name: stops = straeto.BusStop.named(stop_name, fuzzy=True) if query.location is not None: # If we know the location of the client, sort the # list of potential stops by proximity to the client straeto.BusStop.sort_by_proximity(stops, query.location) else: # Obtain the closest stops (at least within 400 meters radius) assert location is not None stops = cast( List[straeto.BusStop], straeto.BusStop.closest_to_list(location, n=2, within_radius=0.4), ) if not stops: # This will fetch the single closest stop, regardless of distance stops = [ cast(straeto.BusStop, straeto.BusStop.closest_to(location)) ] # Handle the case where no bus number was specified (i.e. is 'Any') if result.bus_number == "Any" and stops: stop = stops[0] routes = sorted( (straeto.BusRoute.lookup(rid).number for rid in stop.visits.keys()), key=lambda r: int(r), ) if len(routes) != 1: # More than one route possible: ask user to clarify route_seq = natlang_seq(list(map(str, routes))) answer = (" ".join( ["Leiðir", route_seq, "stoppa á", to_dative(stop.name)]) + ". Spurðu um eina þeirra.") voice_answer = (" ".join([ "Leiðir", numbers_to_neutral(route_seq), "stoppa á", to_dative(stop.name), ]) + ". Spurðu um eina þeirra.") response = dict(answer=answer) return response, answer, voice_answer # Only one route: use it as the query subject bus_number = routes[0] bus_name = "strætó númer {0}".format(bus_number) else: bus_number = result.bus_number if "bus_number" in result else 0 bus_name = result.bus_name if "bus_name" in result else "Óþekkt" # Prepare results bus_name = cap_first(bus_name) va = [bus_name] a = [] arrivals = [] arrivals_dict = {} arrives = False route_number = str(bus_number) # First, check the closest stop # !!! TODO: Prepare a different area_priority parameter depending # !!! on the user's location; i.e. if she is in Eastern Iceland, # !!! route '1' would mean 'AL.1' instead of 'ST.1'. if stops: for stop in stops: arrivals_dict, arrives = SCHEDULE_TODAY.arrivals( route_number, stop) if arrives: break arrivals = list(arrivals_dict.items()) a = ["Á", to_accusative(stop.name), "í átt að"] if arrivals: # Get a predicted arrival time for each direction from the # real-time bus location server prediction = SCHEDULE_TODAY.predicted_arrival(route_number, stop) now = datetime.utcnow() hms_now = (now.hour, now.minute + (now.second // 30), 0) first = True # We may get three (or more) arrivals if there are more than two # endpoints for the bus route in the schedule. To minimize # confusion, we only include the two endpoints that have the # earliest arrival times and skip any additional ones. arrivals = sorted(arrivals, key=lambda t: t[1][0])[:2] for direction, times in arrivals: if not first: va.append(", og") a.append(". Í átt að") va.extend(["í átt að", to_dative(direction)]) a.append(to_dative(direction)) deviation = [] if prediction and direction in prediction: # We have a predicted arrival time hms_sched = times[0] hms_pred = prediction[direction][0] # Calculate the difference between the prediction and # now, and skip it if it is 1 minute or less diff = hms_diff(hms_pred, hms_now) if abs(diff) <= 1: deviation = [", en er að fara núna"] else: # Calculate the difference in minutes between the # schedule and the prediction, with a positive number # indicating a delay diff = hms_diff(hms_pred, hms_sched) if diff < -1: # More than one minute ahead of schedule if diff < -5: # More than 5 minutes ahead deviation = [ ", en kemur sennilega fyrr, eða", hms_fmt(hms_pred), ] else: # Two to five minutes ahead deviation = [ ", en er", str(-diff), "mínútum á undan áætlun", ] elif diff >= 3: # 3 minutes or more behind schedule deviation = [ ", en kemur sennilega ekki fyrr en", hms_fmt(hms_pred), ] if first: assert stop is not None if deviation: va.extend(["á að koma á", to_accusative(stop.name)]) else: va.extend(["kemur á", to_accusative(stop.name)]) va.append("klukkan") a.append("klukkan") if len(times) == 1 or (len(times) > 1 and hms_diff(times[0], hms_now) >= 10): # Either we have only one arrival time, or the next arrival is # at least 10 minutes away: only pronounce one time hms = times[0] time_text = hms_fmt(hms) else: # Return two or more times time_text = " og ".join(hms_fmt(hms) for hms in times) va.append(time_text) a.append(time_text) va.extend(deviation) a.extend(deviation) first = False elif arrives: # The given bus has already completed its scheduled halts at this stop today assert stops stop = stops[0] reply = ["kemur ekki aftur á", to_accusative(stop.name), "í dag"] va.extend(reply) a = [bus_name] + reply elif stops: # The given bus doesn't stop at all at either of the two closest stops stop = stops[0] va.extend(["stoppar ekki á", to_dative(stop.name)]) a = [bus_name, "stoppar ekki á", to_dative(stop.name)] else: # The bus stop name is not recognized va = a = [stop_name.capitalize(), "er ekki biðstöð"] if stop is not None: # Store a location coordinate and a bus stop name in the context query.set_context({"location": stop.location, "bus_stop": stop.name}) # Hack: Since we know that the query string contains no uppercase words, # adjust it accordingly; otherwise it may erroneously contain capitalized # words such as Vagn and Leið. bq = query.beautified_query for t in ( ("Vagn ", "vagn "), ("Vagni ", "vagni "), ("Vagns ", "vagns "), ("Leið ", "leið "), ("Leiðar ", "leiðar "), ): bq = bq.replace(*t) query.set_beautified_query(bq) def assemble(x): """ Intelligently join answer string components. """ return (" ".join(x) + ".").replace(" .", ".").replace(" ,", ",") voice_answer = assemble(va) answer = assemble(a) response = dict(answer=answer) return response, answer, voice_answer
def Nl(node, params, result): """ Noun phrase containing name of specific location """ result["location"] = cap_first(result._nominative)
def query_person(query: Query, session: Session, name: str) -> AnswerTuple: """ A query for a person by name """ response: Dict[str, Any] = dict(answers=[], sources=[]) if name in {"hann", "hún", "hán", "það"}: # Using a personal pronoun: check whether we can infer # the name from the query context, i.e. from a recent query result ctx = None if name == "það" else query.fetch_context() if ctx and "person_name" in ctx: # Yes, success name = cast(str, ctx["person_name"]) else: # No - give up if name == "hann": answer = voice_answer = "Ég veit ekki við hvern þú átt." elif name == "hún": answer = voice_answer = "Ég veit ekki við hverja þú átt." else: answer = voice_answer = "Ég veit ekki við hvert þú átt." return response, answer, voice_answer if query.is_voice: # Handle voice query if " " not in name: # If using voice, do not attempt to answer single-name # queries ('Hver er Guðmundur?') since the answers are almost # always nonsensical query.set_error("E_PERSON_NOT_FOUND") return dict(answer=""), "", "" # A name with at least two components title, source = query_person_title(session, name) if not title: # Rather than accept this as a voice query # for a person that is not found, return an # error and thereby give other query handlers # a chance to parse this query.set_error("E_PERSON_NOT_FOUND") return dict(answer=""), "", "" answer = title v = answer.split() answer = cap_first(answer) for i, w in enumerate(v): if len(w) > 1 and w.isupper(): # Probably an abbreviation, such as 'FME' or 'BSÍ': # convert to 'F M E' v[i] = " ".join(w) voice_answer = name + " er " + " ".join(v) + "." # Set the context for a subsequent query query.set_context({"person_name": name}) # Set source, if known if source is not None: query.set_source(source) response = dict(answer=answer) else: # Not voice voice_answer = "" titles = _query_person_titles(session, name) # Now, create a list of articles where this person name appears articles = _query_article_list(session, name) response = dict(answers=titles, sources=articles) if titles and "answer" in titles[0]: # 'Már Guðmundsson er seðlabankastjóri.' answer = titles[0]["answer"] # Set the context for a subsequent query query.set_context({"person_name": name}) else: answer = "Nafnið '" + name + "' finnst ekki." return response, answer, voice_answer