Example #1
0
File: main.py Project: busla/Reynir
def top_persons(limit=_TOP_PERSONS_LENGTH):
    """ Return a list of names and titles appearing recently in the news """
    toplist = dict()
    bindb = BIN_Db.get_db()

    with SessionContext(commit=True) as session:

        q = session.query(Person.name, Person.title, Person.article_url, Article.id) \
            .join(Article).join(Root) \
            .filter(Root.visible) \
            .order_by(desc(Article.timestamp))[0:limit * 2] # Go through up to 2 * N records

        for p in q:
            # Insert the name into the list if it's not already there,
            # or if the new title is longer than the previous one
            if p.name not in toplist or len(p.title) > len(toplist[p.name][0]):
                toplist[p.name] = (correct_spaces(p.title), p.article_url,
                                   p.id, bindb.lookup_name_gender(p.name))
                if len(toplist) >= limit:
                    # We now have as many names as we initially wanted: terminate the loop
                    break

    with changedlocale() as strxfrm:
        # Convert the dictionary to a sorted list of dicts
        return sorted([
            dict(name=name, title=tu[0], gender=tu[3], url=tu[1], uuid=tu[2])
            for name, tu in toplist.items()
        ],
                      key=lambda x: strxfrm(x["name"]))
Example #2
0
def sentence(state, result):
    """ Called when sentence processing is complete """
    q = state["query"]
    if "qtype" not in result:
        q.set_error("E_QUERY_NOT_UNDERSTOOD")
        return

    # Successfully matched a query type
    try:
        with changedlocale(category="LC_TIME"):
            for k, handler_func in _Q2FN_MAP:
                if k in result:
                    # Hand query object over to handler function
                    handler_func(q, result)
                    # Lowercase the query string to avoid 'Dagur' being
                    # displayed with a capital D
                    q.lowercase_beautified_query()
                    q.set_qtype(_DATE_QTYPE)
                    break

    except Exception as e:
        logging.warning(
            "Exception {0} while processing date query '{1}'".format(e, q.query)
        )
        q.set_error("E_EXCEPTION: {0}".format(e))
Example #3
0
def iceformat_float(fp_num: float,
                    decimal_places: int = 2,
                    strip_zeros: bool = True) -> str:
    """ Convert number to Icelandic decimal format string. """
    with changedlocale(category="LC_NUMERIC"):
        fmt = "%.{0}f".format(decimal_places)
        res = locale.format_string(fmt, float(fp_num),
                                   grouping=True).replace(" ", ".")
        return strip_trailing_zeros(res) if strip_zeros else res
Example #4
0
def recent_persons(limit=_RECENT_PERSONS_LENGTH):
    """ Return a list of names and titles appearing recently in the news """
    toplist = dict()

    with SessionContext(read_only=True) as session:

        q = (
            session.query(Person.name, Person.title, Person.article_url, Article.id)
            .join(Article)
            .join(Root)
            .filter(Root.visible)
            .order_by(desc(Article.timestamp))[
                0 : limit * 2
            ]  # Go through up to 2 * N records
        )

        def is_better_title(new_title, old_title):
            len_new = len(new_title)
            len_old = len(old_title)
            if len_old >= _MAX_TITLE_LENGTH:
                # Too long: we want a shorter one
                return len_new < len_old
            if len_new >= _MAX_TITLE_LENGTH:
                # This one is too long: we don't want it
                return False
            # Otherwise, longer is better
            return len_new > len_old

        with BIN_Db.get_db() as bindb:
            for p in q:
                # Insert the name into the list if it's not already there,
                # or if the new title is longer than the previous one
                if p.name not in toplist or is_better_title(
                    p.title, toplist[p.name][0]
                ):
                    toplist[p.name] = (
                        correct_spaces(p.title),
                        p.article_url,
                        p.id,
                        bindb.lookup_name_gender(p.name),
                    )
                    if len(toplist) >= limit:
                        # We now have as many names as we initially wanted: terminate the loop
                        break

    with changedlocale() as strxfrm:
        # Convert the dictionary to a sorted list of dicts
        return sorted(
            [
                dict(name=name, title=tu[0], gender=tu[3], url=tu[1], uuid=tu[2])
                for name, tu in toplist.items()
            ],
            key=lambda x: strxfrm(x["name"]),
        )
Example #5
0
def recent_persons(limit=_RECENT_PERSONS_LENGTH):
    """ Return a list of names and titles appearing recently in the news """
    toplist = dict()

    with SessionContext(read_only=True) as session:

        q = (
            session.query(Person.name, Person.title, Person.article_url, Article.id)
            .join(Article)
            .join(Root)
            .filter(Root.visible)
            .order_by(desc(Article.timestamp))[
                0 : limit * 2
            ]  # Go through up to 2 * N records
        )

        def is_better_title(new_title, old_title):
            len_new = len(new_title)
            len_old = len(old_title)
            if len_old >= _MAX_TITLE_LENGTH:
                # Too long: we want a shorter one
                return len_new < len_old
            if len_new >= _MAX_TITLE_LENGTH:
                # This one is too long: we don't want it
                return False
            # Otherwise, longer is better
            return len_new > len_old

        with BIN_Db.get_db() as bindb:
            for p in q:
                # Insert the name into the list if it's not already there,
                # or if the new title is longer than the previous one
                if p.name not in toplist or is_better_title(
                    p.title, toplist[p.name][0]
                ):
                    toplist[p.name] = (
                        correct_spaces(p.title),
                        p.article_url,
                        p.id,
                        bindb.lookup_name_gender(p.name),
                    )
                    if len(toplist) >= limit:
                        # We now have as many names as we initially wanted: terminate the loop
                        break

    with changedlocale() as strxfrm:
        # Convert the dictionary to a sorted list of dicts
        return sorted(
            [
                dict(name=name, title=tu[0], gender=tu[3], url=tu[1], uuid=tu[2])
                for name, tu in toplist.items()
            ],
            key=lambda x: strxfrm(x["name"]),
        )
Example #6
0
def make_response_list(rd):
    """ Create a response list from the result dictionary rd """
    # Now we have a dictionary of distinct results, along with their URLs

    # Go through the results and delete later ones
    # that are contained within earlier ones
    rl = list(rd.keys())
    for i in range(len(rl) - 1):
        ri = rl[i]
        if ri is not None:
            for j in range(i + 1, len(rl)):
                rj = rl[j]
                if rj is not None:
                    if rj.lower() in ri.lower():
                        rd[ri].update(rd[rj])
                        del rd[rj]
                        rl[j] = None

    # Go again through the results and delete earlier ones
    # that are contained within later ones
    rl = list(rd.keys())
    for i in range(len(rl) - 1):
        ri = rl[i]
        for j in range(i + 1, len(rl)):
            rj = rl[j]
            if ri.lower() in rj.lower():
                rd[rj].update(rd[ri])
                del rd[ri]
                break

    with changedlocale() as strxfrm:

        def sort_articles(articles):
            """ Sort the individual article URLs so that the newest one appears first """
            return sorted(articles.values(), key=lambda x: x.ts, reverse=True)

        rl = sorted([(s, sort_articles(articles))
                     for s, articles in rd.items()],
                    key=lambda x: (-len(x[1]), strxfrm(x[0]))
                    )  # Sort by number of URLs in article dict

    # If we have 5 or more titles/definitions with more than one associated URL,
    # cut off those that have only one source URL
    if len(rl) > _CUTOFF_AFTER and len(rl[_CUTOFF_AFTER][1]) > 1:
        rl = [val for val in rl if len(val[1]) > 1]

    # Crop the article url lists down to _MAX_URLS
    for i, val in enumerate(rl):
        if len(val[1]) > _MAX_URLS:
            rl[i] = (val[0], val[1][0:_MAX_URLS])
    return rl[0:_MAXLEN_ANSWER]
Example #7
0
def main():
    """ Main program """
    try:
        Settings.read("config/Reynir.conf")
    except ConfigError as e:
        print("Configuration error: {0}".format(e), file = sys.stderr)
        return 2

    verbs = read_verbs("resources/sagnir.txt")
    with changedlocale() as strxfrm:
        verbs_sorted = sorted(verbs.values(), key = lambda x: strxfrm(x.nom))
    print("#\n# Verb list generated by verbs.py from resources/sagnir.txt")
    print("#", str(datetime.utcnow())[0:19], "\n#\n")
    display(verbs_sorted)
    print("\n# Total: {0} distinct verbs\n".format(len(verbs)))

    # Check which verbs are missing or different in Verbs.conf
    #count = check_missing(verbs_sorted)
    #print("\n# Total: {0} missing verb forms\n".format(count))

    return 0
Example #8
0
def main():
    """ Main program """
    try:
        Settings.read("config/Greynir.conf")
    except ConfigError as e:
        print("Configuration error: {0}".format(e), file=sys.stderr)
        return 2

    verbs = read_verbs("resources/sagnir.txt")
    with changedlocale() as strxfrm:
        verbs_sorted = sorted(verbs.values(), key=lambda x: strxfrm(x.nom))
    print("#\n# Verb list generated by verbs.py from resources/sagnir.txt")
    print("#", str(datetime.utcnow())[0:19], "\n#\n")
    display(verbs_sorted)
    print("\n# Total: {0} distinct verbs\n".format(len(verbs)))

    # Check which verbs are missing or different in Verbs.conf
    #count = check_missing(verbs_sorted)
    #print("\n# Total: {0} missing verb forms\n".format(count))

    return 0
Example #9
0
def sentence(state, result):
    """ Called when sentence processing is complete """
    q = state["query"]
    if "qtype" not in result:
        q.set_error("E_QUERY_NOT_UNDERSTOOD")
        return

    # Successfully matched a query type
    try:
        with changedlocale(category="LC_TIME"):
            # Get timezone and date
            # TODO: Restore correct timezone handling
            # tz = timezone4loc(q.location, fallback="IS")
            now = datetime.utcnow()  # datetime.now(timezone(tz))
            qkey = None

            # Asking about current date
            if "now" in result:
                date_str = now.strftime("%A %-d. %B %Y")
                answer = date_str.capitalize()
                voice = "Í dag er {0}".format(date_str)
                # Put a spelled-out ordinal number instead of the numeric one
                # to get the grammar right
                voice = re.sub(r" \d+\. ", " " + _DAY_INDEX_NOM[now.day] + " ",
                               voice)
                response = dict(answer=answer)
                qkey = "CurrentDate"
            # Asking about period until/since a given date
            elif ("until" in result
                  or "since" in result) and "target" in result:
                target = result.target
                # target.replace(tzinfo=timezone(tz))
                # Find the number of days until target date
                (response, answer, voice) = howlong_desc_answ(target)
                qkey = "FutureDate" if "until" in result else "SinceDate"
            elif "when" in result and "target" in result:
                # TODO: Fix this so it includes weekday, e.g.
                # "Sunnudaginn 1. október"
                # Use plural 'eru' for 'páskar'
                is_verb = "er" if "is_verb" not in result else result.is_verb
                date_str = (result.desc + " " + is_verb + " " +
                            result.target.strftime("%-d. %B"))
                answer = voice = date_str[0].upper() + date_str[1:].lower()
                # Put a spelled-out ordinal number instead of the numeric one,
                # in accusative case
                voice = re.sub(r"\d+\. ",
                               _DAY_INDEX_ACC[result.target.day] + " ", voice)
                response = dict(answer=answer)
            else:
                # Shouldn't be here
                raise Exception("Unable to handle date query")

            q.set_key(qkey)
            q.set_answer(response, answer, voice)
            # Lowercase the query string to avoid 'Dagur' being
            # displayed with a capital D
            q.lowercase_beautified_query()
            q.set_qtype(_DATE_QTYPE)

    except Exception as e:
        logging.warning("Exception while processing date query: {0}".format(e))
        q.set_error("E_EXCEPTION: {0}".format(e))
Example #10
0
def chart_stats(session=None, num_days: int = 7) -> Dict[str, Any]:
    """ Return scraping and parsing stats for charts """
    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    labels = []
    sources: Dict[str, List[int]] = {}
    parsed_data = []
    query_data = []

    # Get article count for each source for each day, and query count for each day
    # We change locale to get localized date weekday/month names
    with changedlocale(category="LC_TIME"):
        for n in range(0, num_days):
            days_back = num_days - n - 1
            start = today - timedelta(days=days_back)
            end = today - timedelta(days=days_back - 1)

            # Generate label
            dfmtstr = "%-d. %b" if start < today - timedelta(
                days=6) else "%a %-d. %b"
            labels.append(start.strftime(dfmtstr))

            sent = 0
            parsed = 0

            # Get article count per source for day
            # Also collect parsing stats for parse % chart
            q = ChartsQuery.period(start, end, enclosing_session=session)
            for (name, cnt, s, p) in q:
                sources.setdefault(name, []).append(cnt)
                sent += s
                parsed += p

            percent = round((parsed / sent) * 100, 2) if sent else 0
            parsed_data.append(percent)

            # Get query count for day
            q = QueriesQuery.period(start, end, enclosing_session=session)
            query_data.append(q[0][0])

    # Create datasets for bar chart
    datasets = []
    article_count = 0
    for k, v in sorted(sources.items()):
        color = _SOURCE_ROOT_COLORS.get(k, "#000")
        datasets.append({"label": k, "backgroundColor": color, "data": v})
        article_count += sum(v)

    # Calculate averages
    scrape_avg = article_count / num_days
    parse_avg = sum(parsed_data) / num_days
    query_avg = sum(query_data) / num_days

    return {
        "scraped": {
            "labels": labels,
            "datasets": datasets,
            "avg": scrape_avg
        },
        "parsed": {
            "labels": labels,
            "datasets": [{
                "data": parsed_data
            }],
            "avg": parse_avg,
        },
        "queries": {
            "labels": labels,
            "datasets": [{
                "data": query_data
            }],
            "avg": query_avg,
        },
    }
Example #11
0
def format_icelandic_float(fp_num):
    """ Convert number to Icelandic decimal format. """
    with changedlocale(category="LC_NUMERIC"):
        res = locale.format_string("%.2f", fp_num,
                                   grouping=True).replace(" ", ".")
        return strip_trailing_zeros(res)
Example #12
0
def chart_stats(session=None, num_days=7):
    """ Return scraping and parsing stats for charts """

    # TODO: This should be put in a column in the roots table
    colors = {
        "Kjarninn": "#f17030",
        "RÚV": "#dcdcdc",
        "Vísir": "#3d6ab9",
        "Morgunblaðið": "#020b75",
        "Eyjan": "#ca151c",
        "Kvennablaðið": "#900000",
        "Stundin": "#ee4420",
        "Hringbraut": "#44607a",
        "Fréttablaðið": "#002a61",
        "Hagstofa Íslands": "#818285",
        "DV": "#ed1c24",
    }

    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    labels = []
    sources = {}
    parsed_data = []
    query_data = []

    # Get article count for each source for each day, and query count for each day
    # We change locale to get localized date weekday/month names
    with changedlocale(category="LC_TIME"):
        for n in range(0, num_days):
            days_back = num_days - n - 1
            start = today - timedelta(days=days_back)
            end = today - timedelta(days=days_back - 1)

            # Generate label
            if start < today - timedelta(days=6):
                labels.append(start.strftime("%-d. %b"))
            else:
                labels.append(start.strftime("%A").capitalize())

            sent = 0
            parsed = 0

            # Get article count per source for day
            # Also collect parsing stats for parse % chart
            q = ChartsQuery.period(start, end, enclosing_session=session)
            for (name, cnt, s, p) in q:
                sources.setdefault(name, []).append(cnt)
                sent += s
                parsed += p

            percent = round((parsed / sent) * 100, 2) if sent else 0
            parsed_data.append(percent)

            # Get query count for day
            q = QueriesQuery.period(start, end, enclosing_session=session)
            query_data.append(q[0][0])

    # Create datasets for bar chart
    datasets = []
    article_count = 0
    for k, v in sorted(sources.items()):
        color = colors.get(k, "#000")
        datasets.append({"label": k, "backgroundColor": color, "data": v})
        article_count += sum(v)

    # Calculate averages
    scrape_avg = article_count / num_days
    parse_avg = sum(parsed_data) / num_days
    query_avg = sum(query_data) / num_days

    return {
        "scraped": {
            "labels": labels,
            "datasets": datasets,
            "avg": scrape_avg
        },
        "parsed": {
            "labels": labels,
            "datasets": [{
                "data": parsed_data
            }],
            "avg": parse_avg,
        },
        "queries": {
            "labels": labels,
            "datasets": [{
                "data": query_data
            }],
            "avg": query_avg,
        },
    }
Example #13
0
def chart_stats(session=None, num_days=7):
    """ Return scraping and parsing stats for charts """

    # TODO: This should be put in a column in the roots table
    colors = {
        "Kjarninn": "#f17030",
        "RÚV": "#dcdcdc",
        "Vísir": "#3d6ab9",
        "Morgunblaðið": "#020b75",
        "Eyjan": "#ca151c",
        "Kvennablaðið": "#900000",
    }

    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    labels = []
    sources = {}
    parsed_data = []

    # Get article count for each source for each day
    with changedlocale(category="LC_TIME"):
        for n in range(0, num_days):
            days_back = num_days - n - 1
            start = today - timedelta(days=days_back)
            end = today - timedelta(days=days_back - 1)

            # Generate label
            if start < today - timedelta(days=6):
                labels.append(start.strftime("%-d. %b"))
            else:
                labels.append(start.strftime("%A"))

            sent = 0
            parsed = 0

            # Get article count per source for day
            # Also collect parsing stats
            q = ChartsQuery.period(start, end, enclosing_session=session)
            for (name, cnt, s, p) in q:
                sources.setdefault(name, []).append(cnt)
                sent += s
                parsed += p

            percent = round((parsed / sent) * 100, 2) if sent else 0
            parsed_data.append(percent)

    # Create datasets for bar chart
    datasets = []
    for k, v in sorted(sources.items()):
        color = colors.get(k, "#000")
        datasets.append({"label": k, "backgroundColor": color, "data": v})

    return {
        "scraped": {
            "labels": labels,
            "datasets": datasets
        },
        "parsed": {
            "labels": labels,
            "datasets": [{
                "data": parsed_data
            }]
        },
    }
Example #14
0
def top_news(topic=None, offset=0, limit=_TOP_NEWS_LENGTH):
    """ Return a list of articles (with a particular topic) in
        chronologically reversed order. """
    toplist = []
    topdict = dict()

    with SessionContext(commit=True) as session:

        q = (session.query(Article).join(Root).filter(
            Article.tree != None).filter(Article.timestamp != None).filter(
                Article.timestamp <= datetime.utcnow()).filter(
                    Article.heading > "").filter(
                        Article.num_sentences > 0).filter(
                            Root.visible == True))

        if topic is not None:
            # Filter by topic identifier
            q = q.join(ArticleTopic).join(Topic).filter(
                Topic.identifier == topic)

        q = q.order_by(desc(Article.timestamp)).offset(offset).limit(limit)

        class ArticleDisplay:
            """ Utility class to carry information about an article to the web template """
            def __init__(
                self,
                heading,
                timestamp,
                url,
                uuid,
                num_sentences,
                num_parsed,
                icon,
                localized_date,
                source,
            ):
                self.heading = heading
                self.timestamp = timestamp
                self.url = url
                self.uuid = uuid
                self.num_sentences = num_sentences
                self.num_parsed = num_parsed
                self.icon = icon
                self.localized_date = localized_date
                self.source = source

            @property
            def width(self):
                """ The ratio of parsed sentences to the total number of sentences,
                    expressed as a percentage string """
                if self.num_sentences == 0:
                    return "0%"
                return "{0}%".format(
                    (100 * self.num_parsed) // self.num_sentences)

            @property
            def time(self):
                return self.timestamp.isoformat()[11:16]

            @property
            def date(self):
                if datetime.today().year == self.timestamp.year:
                    return self.localized_date
                return self.fulldate

            @property
            def fulldate(self):
                return self.localized_date + self.timestamp.strftime(" %Y")

        with changedlocale(category="LC_TIME"):
            for a in q:
                # Instantiate article objects from results
                source = a.root.domain
                icon = source + ".png"
                locdate = a.timestamp.strftime("%-d. %b")

                d = ArticleDisplay(
                    heading=a.heading,
                    timestamp=a.timestamp,
                    url=a.url,
                    uuid=a.id,
                    num_sentences=a.num_sentences,
                    num_parsed=a.num_parsed,
                    icon=icon,
                    localized_date=locdate,
                    source=source,
                )
                toplist.append(d)

    return toplist
Example #15
0
def test_query_api(client):
    """ Make various query API calls and validate response. """

    c = client

    google_key = has_google_api_key()

    # First, make sure nonsensical queries are not answered
    qstr = {"q": "blergh smergh vlurgh"}
    r = c.get("/query.api?" + urlencode(qstr))
    assert r.content_type.startswith(API_CONTENT_TYPE)
    assert r.is_json
    json = r.get_json()
    assert "valid" in json
    assert json["valid"] == True
    assert "error" in json
    assert "answer" not in json

    # Person and entity title queries are tested using a dummy database
    # populated with data from SQL file in tests/files/

    # Builtin module: title
    json = qmcall(c, {"q": "hver er viðar þorsteinsson", "voice": True}, "Person")
    assert json["voice"].startswith("Viðar Þorsteinsson er ")
    assert json["voice"].endswith(".")

    # Builtin module: title
    json = qmcall(c, {"q": "hver er björn þorsteinsson", "voice": True}, "Person")
    assert json["voice"].startswith("Björn Þorsteinsson er ")
    assert json["voice"].endswith(".")

    # Builtin module: person
    json = qmcall(c, {"q": "hver er forsætisráðherra", "voice": True}, "Title")
    assert json["voice"].startswith("Forsætisráðherra er ")
    assert json["voice"].endswith(".")

    # Builtin module: person w. title w. uppercase name
    # json = qmcall(c, {"q": "hver er forstjóri sjóvá", "voice": True}, "Title")
    # assert json["voice"].startswith("Forstjóri") and "Jón Jónsson" in json["voice"]

    # Builtin module: entities
    json = qmcall(c, {"q": "hvað er Nox Medical"}, "Entity")
    assert "nýsköpunarfyrirtæki" in json["answer"].lower()
    assert json["key"] == "Nox Medical"

    # Arithmetic module
    ARITHM_QUERIES = {
        "hvað er fimm sinnum tólf": "60",
        "hvað er 12 sinnum 12?": "144",
        "hvað er nítján plús 3": "22",
        "hvað er nítján plús þrír": "22",
        "hvað er nítján + 3": "22",
        "hvað er 19 + 3": "22",
        "hvað er 19 + þrír": "22",
        "hvað er hundrað mínus sautján": "83",
        "hvað er hundrað - sautján": "83",
        "hvað er 100 - sautján": "83",
        "hvað er 100 - 17": "83",
        "hvað er 17 deilt með fjórum": "4,25",
        "hvað er 17 / 4": "4,25",
        "hvað er 18 deilt með þrem": "6",
        "hvað er 18 / þrem": "6",
        "hvað er 18 / 3": "6",
        "hver er kvaðratrótin af 256": "16",
        "hvað er 12 í þriðja veldi": "1728",
        "hvað eru tveir í tíunda veldi": "1024",
        "hvað eru 17 prósent af 20": "3,4",
        "hvað er 7000 deilt með 812": "8,62",
        "hvað er þrisvar sinnum sjö": "21",
        "hvað er fjórðungur af 28": "7",
        "hvað er einn tuttugasti af 192": "9,6",
        "reiknaðu 7 sinnum 7": "49",
        "reiknaðu 7 x 7": "49",
        "reiknaðu sjö x 7": "49",
        "reiknaðu nítján x sjö": "133",
        "geturðu reiknað kvaðratrótina af 9": "3",
        "hvað er 8900 með vaski": "11.036",
        "hvað eru 7500 krónur með virðisaukaskatti": "9.300",
        "hvað er 9300 án vask": "7.500",
        "hvað er pí deilt með pí": "1",
        "hvað er pí / pí": "1",
        "hvað er pí í öðru veldi": "9,87",
        "hvað er tíu deilt með pí": "3,18",
    }

    for q, a in ARITHM_QUERIES.items():
        json = qmcall(c, {"q": q}, "Arithmetic")
        assert json["answer"] == a

    json = qmcall(
        c, {"q": "hvað er pí", "client_id": DUMMY_CLIENT_ID, "private": False}, "PI"
    )
    assert "π" in json["answer"]
    assert "3,14159" in json["answer"]

    json = qmcall(
        c, {"q": "hvað er það sinnum tveir", "client_id": DUMMY_CLIENT_ID}, "Arithmetic"
    )
    assert json["answer"].startswith("6,")

    # Bus module
    json = qmcall(
        c, {"q": "hvaða stoppistöð er næst mér", "voice": True}, "NearestStop"
    )
    assert json["answer"] == "Fiskislóð"
    assert json["voice"] == "Næsta stoppistöð er Fiskislóð; þangað eru 310 metrar."

    json = qmcall(
        c,
        {"q": "hvenær er von á vagni númer 17", "voice": True, "test": False},
        "ArrivalTime",
    )
    assert json["answer"] == "Staðsetning óþekkt"  # No location info available

    # Counting module
    json = qmcall(c, {"q": "teldu frá einum upp í tíu"}, "Counting")
    assert json["answer"] == "1…10"

    json = qmcall(c, {"q": "teldu hratt niður frá 4", "voice": True}, "Counting")
    assert json["answer"] == "3…0"
    assert "<break time=" in json["voice"]

    json = qmcall(c, {"q": "nennirðu að telja niður frá 24", "voice": True}, "Counting")
    assert json["answer"] == "23…0"

    json = qmcall(c, {"q": "teldu upp að 5000", "voice": True}, "Counting")
    assert len(json["voice"]) < 100

    # Currency module
    json = qmcall(c, {"q": "hvert er gengi dönsku krónunnar?"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað kostar evran"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað kostar bandaríkjadalur mikið í krónum"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(
        c, {"q": "Hvert er gengi krónunnar gagnvart dollara í dag?"}, "Currency"
    )
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvert er gengi krónunnar á móti dollara í dag"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað eru tíu þúsund krónur margir dalir"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað eru 79 dollarar margar evrur?"}, "Currency")
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    # Date module
    SPECIAL_DAYS = (
        "jólin",
        "gamlársdagur",
        "nýársdagur",
        "hvítasunna",
        "páskar",
        "þjóðhátíðardagurinn",
        "baráttudagur verkalýðsins",
        "öskudagur",
        "skírdagur",
        "sumardagurinn fyrsti",
        "verslunarmannahelgi",
        "þorláksmessa",
        "föstudagurinn langi",
        "menningarnótt",
        "sjómannadagurinn",
        "dagur íslenskrar tungu",
        "annar í jólum",
        "feðradagur",
        "mæðradagurinn",
    )
    for d in SPECIAL_DAYS:
        qstr = "hvenær er " + d
        json = qmcall(c, {"q": qstr}, "Date")

    json = qmcall(c, {"q": "hver er dagsetningin?"}, "Date")
    assert json["answer"].endswith(datetime.now().strftime("%Y"))

    json = qmcall(c, {"q": "hvaða dagur er í dag?"}, "Date")
    assert json["answer"].endswith(datetime.now().strftime("%Y"))

    json = qmcall(c, {"q": "Hvað eru margir dagar til jóla?", "voice": True}, "Date")
    assert re.search(r"^\d+", json["answer"])
    assert "dag" in json["voice"]

    json = qmcall(c, {"q": "Hvað eru margir dagar í 12. maí?"}, "Date")
    assert "dag" in json["answer"] or "á morgun" in json["answer"]

    # Tests to make sure this kind of query isn't caught by the distance module
    json = qmcall(c, {"q": "Hvað er langt í jólin?"}, "Date")
    json = qmcall(c, {"q": "Hvað er langt í páska?"}, "Date")

    now = datetime.utcnow()

    with changedlocale(category="LC_TIME"):
        # Today
        dstr = now.date().strftime("%-d. %B")
        json = qmcall(c, {"q": "Hvað eru margir dagar í " + dstr})
        assert "í dag" in json["answer"]
        # Tomorrow
        dstr = (now.date() + timedelta(days=1)).strftime("%-d. %B")
        json = qmcall(c, {"q": "Hvað eru margir dagar í " + dstr})
        assert "á morgun" in json["answer"]

    json = qmcall(c, {"q": "hvaða ár er núna?"}, "Date")
    assert str(now.year) in json["answer"]

    json = qmcall(c, {"q": "er hlaupár?"}, "Date")
    assert str(now.year) in json["answer"]

    json = qmcall(c, {"q": "er 2020 hlaupár?"}, "Date")
    assert "var hlaupár" in json["answer"]

    json = qmcall(c, {"q": "var árið 1999 hlaupár?"}, "Date")
    assert "ekki hlaupár" in json["answer"]

    json = qmcall(c, {"q": "hvað eru margir dagar í desember"}, "Date")
    assert json["answer"].startswith("31")
    assert "dag" in json["answer"]

    json = qmcall(c, {"q": "hvað eru margir dagar í febrúar 2024"}, "Date")
    assert json["answer"].startswith("29")
    assert "dag" in json["answer"]

    json = qmcall(c, {"q": "Hvað er langt fram að verslunarmannahelgi"}, "Date")
    assert re.search(r"^\d+", json["answer"])

    # json = qmcall(c, {"q": "hvað er langt liðið frá uppstigningardegi"}, "Date")
    # assert re.search(r"^\d+", json["answer"])

    json = qmcall(c, {"q": "hvenær eru jólin"}, "Date")
    assert re.search(r"25", json["answer"]) is not None

    # Dictionary module
    json = qmcall(
        c, {"q": "hvernig skilgreinir orðabókin orðið kettlingur"}, "Dictionary"
    )
    assert "kettlingur" in json["answer"].lower()

    json = qmcall(c, {"q": "flettu upp orðinu skíthæll í orðabók"}, "Dictionary")
    assert "skíthæll" in json["answer"].lower()
    assert json["source"] == "Íslensk nútímamálsorðabók"

    # Distance module
    # NB: No Google API key on test server
    if google_key:
        json = qmcall(
            c, {"q": "hvað er ég langt frá perlunni", "voice": True}, "Distance"
        )
        assert json["answer"].startswith("3,5 km")
        assert json["voice"].startswith("Perlan er ")
        assert json["source"] == "Google Maps"

        json = qmcall(c, {"q": "hvað er langt í melabúðina", "voice": True}, "Distance")
        assert json["answer"].startswith("1,") and "km" in json["answer"]
        assert json["voice"].startswith("Melabúðin er ")

        json = qmcall(
            c, {"q": "hvað er ég lengi að ganga í kringluna", "voice": True}, "Distance"
        )
        assert json["key"] == "Kringlan"
        assert "klukkustund" in json["answer"] and " km" in json["answer"]
        assert json["voice"].startswith("Að ganga")

        json = qmcall(
            c, {"q": "hvað tekur langan tíma að keyra til akureyrar"}, "Distance"
        )
        assert json["key"] == "Akureyri"
        assert "klukkustundir" in json["answer"] and " km" in json["answer"]
        assert json["answer"].endswith("(389 km).")

    # Flights module
    departure_pattern = r"^Flug \w*? til .*? flýgur frá \w*? \d+\. \w*? klukkan \d\d\:\d\d að staðartíma.$"
    arrival_pattern = r"^Flug \w*? frá .*? lendir [í|á] \w*? \d+\. \w*? klukkan \d\d\:\d\d að staðartíma.$"
    no_matching_flight_pattern = r"Ekkert flug fannst (frá .*? )?(til .*? )?næstu \d+ daga."

    json = qmcall(
        c, {"q": "hvenær fer næsta flug til jfk frá keflavík", "voice": True}, "Flights"
    )
    assert re.search(departure_pattern, json["answer"])
    json = qmcall(
        c, {"q": "hvenær flýgur næsta flug til new york frá keflavík", "voice": True}, "Flights"
    )
    assert re.search(departure_pattern, json["answer"])
    json = qmcall(
        c,
        {"q": "hvenær flýgur næsta flug af stað frá keflavík", "voice": True},
        "Flights",
    )
    assert re.search(departure_pattern, json["answer"])
    json = qmcall(
        c,
        {"q": "hver er brottfarartími næsta flugs frá keflavík", "voice": True},
        "Flights",
    )
    assert re.search(departure_pattern, json["answer"])
    json = qmcall(
        c,
        {"q": "hver er brottfarartíminn fyrir næsta flug frá keflavík", "voice": True},
        "Flights",
    )
    assert re.search(departure_pattern, json["answer"])

    json = qmcall(
        c, {"q": "hvenær lendir næsta flug í keflavík", "voice": True}, "Flights"
    )
    assert re.search(arrival_pattern, json["answer"])
    json = qmcall(
        c, {"q": "hvenær kemur næsta vél á akureyri", "voice": True}, "Flights"
    )
    assert re.search(arrival_pattern, json["answer"]) or re.search(
        no_matching_flight_pattern, json["answer"]
    )  # In case no flights to Akureyri
    json = qmcall(
        c, {"q": "hvenær mætir næsta vél á vopnafjörð", "voice": True}, "Flights"
    )
    assert re.search(arrival_pattern, json["answer"]) or re.search(
        no_matching_flight_pattern, json["answer"]
    )  # In case no flights to Vopnafjörður
    json = qmcall(
        c, {"q": "hvenær mætir næsta vél til vopnafjarðar", "voice": True}, "Flights"
    )
    assert re.search(arrival_pattern, json["answer"]) or re.search(
        no_matching_flight_pattern, json["answer"]
    )  # In case no flights to Vopnafjörður
    json = qmcall(
        c,
        {
            "q": "hver er lendingartími næstu vélar frá reykjavík til vopnafjarðar",
            "voice": True,
        },
        "Flights",
    )
    assert re.search(arrival_pattern, json["answer"]) or re.search(
        no_matching_flight_pattern, json["answer"]
    )
    json = qmcall(
        c,
        {
            "q": "hver er lendingartíminn fyrir næsta flug til reykjavíkur frá akureyri",
            "voice": True,
        },
        "Flights",
    )
    assert re.search(arrival_pattern, json["answer"]) or re.search(
        no_matching_flight_pattern, json["answer"]
    )

    json = qmcall(
        c,
        {"q": "hvenær fer næsta flug til blabla frá ekkitil", "voice": True},
        "Flights",
    )
    assert re.search(no_matching_flight_pattern, json["answer"])
    json = qmcall(
        c,
        {"q": "hvenær fer næsta flug frá ekkitil til blablab", "voice": True},
        "Flights",
    )
    assert re.search(no_matching_flight_pattern, json["answer"])

    # Geography module
    json = qmcall(c, {"q": "hver er höfuðborg spánar", "voice": True}, "Geography")
    assert json["answer"] == "Madríd"
    assert "Spánar" in json["voice"]  # not 'Spáns', which was a bug

    json = qmcall(c, {"q": "Hver er höfuðborg taiwan"}, "Geography")
    assert json["answer"] == "Taípei"

    json = qmcall(c, {"q": "hver er höfuðborg norður-makedóníu"}, "Geography")
    assert json["answer"] == "Skopje"

    json = qmcall(c, {"q": "hver er höfuðborg norður kóreu"}, "Geography")
    assert json["answer"] == "Pjongjang"

    # json = qmcall(
    #     c, {"q": "hver er höfuðborg sameinuðu arabísku furstadæmanna"}, "Geography"
    # )
    # assert json["answer"] == "Abú Dabí"

    json = qmcall(c, {"q": "hvað er höfuðborgin í bretlandi"}, "Geography")
    assert json["answer"] == "Lundúnir"

    json = qmcall(c, {"q": "í hvaða landi er jóhannesarborg"}, "Geography")
    assert json["answer"].endswith("Suður-Afríku")

    json = qmcall(c, {"q": "í hvaða landi er kalifornía"}, "Geography")
    assert "Bandaríkjunum" in json["answer"] and json["key"] == "Kalifornía"

    json = qmcall(c, {"q": "í hvaða heimsálfu er míkrónesía"}, "Geography")
    assert json["answer"].startswith("Eyjaálfu")

    json = qmcall(c, {"q": "hvar í heiminum er máritanía"}, "Geography")
    assert "Afríku" in json["answer"]

    json = qmcall(c, {"q": "hvar er Kaupmannahöfn"}, "Geography")
    assert "Danmörku" in json["answer"]

    json = qmcall(c, {"q": "hvar er borgin tókýó"}, "Geography")
    assert "Japan" in json["answer"]

    # News module
    json = qmcall(c, {"q": "Hvað er í fréttum", "voice": True}, "News")
    assert len(json["answer"]) > 80  # This is always going to be a long answer
    assert json["voice"].startswith("Í fréttum rúv er þetta helst")

    json = qmcall(c, {"q": "Hvað er helst í fréttum", "voice": True}, "News")
    assert len(json["answer"]) > 80  # This is always going to be a long answer
    assert json["voice"].startswith("Í fréttum rúv er þetta helst")

    # Opinion module
    json = qmcall(c, {"q": "hvaða skoðun hefurðu á þriðja orkupakkanum"}, "Opinion")
    assert json["answer"].startswith("Ég hef enga sérstaka skoðun")
    assert json["key"] == "þriðji orkupakkinn"

    json = qmcall(
        c, {"q": "hvað finnst þér eiginlega um Katrínu Jakobsdóttur"}, "Opinion"
    )
    assert json["answer"].startswith("Ég hef enga sérstaka skoðun")
    assert json["key"] == "Katrín Jakobsdóttir"

    json = qmcall(c, {"q": "hver er skoðun þín á blurghsmurgdurg"}, "Opinion")
    assert json["answer"].startswith("Ég hef enga sérstaka skoðun")
    assert json["key"] == "blurghsmurgdurg"

    # Petrol module
    json = qmcall(c, {"q": "Hvar er næsta bensínstöð", "voice": True}, "Petrol")
    assert "Ánanaust" in json["answer"]
    assert "source" in json and json["source"].startswith("Gasvaktin")

    json = qmcall(
        c, {"q": "Hvar fæ ég ódýrt bensín í nágrenninu", "voice": True}, "Petrol"
    )
    assert "source" in json and json["source"].startswith("Gasvaktin")

    json = qmcall(c, {"q": "Hvar fæ ég ódýrasta bensínið"}, "Petrol")
    assert "source" in json and json["source"].startswith("Gasvaktin")

    json = qmcall(c, {"q": "hvar er bensínið ódýrast"}, "Petrol")
    assert "source" in json and json["source"].startswith("Gasvaktin")

    # Places module
    # NB: No Google API key on test server
    if google_key:
        json = qmcall(c, {"q": "Hvað er opið lengi í Melabúðinni"}, "Places")
        json = qmcall(c, {"q": "Er lokað á Forréttabarnum?"}, "Places")
        json = qmcall(c, {"q": "Hvenær opnar sundhöllin?"}, "Places")

    # Random module
    json = qmcall(c, {"q": "Veldu tölu milli sautján og 30"}, "Random")
    assert int(json["answer"]) >= 17 and int(json["answer"]) <= 30

    json = qmcall(c, {"q": "veldu fyrir mig tölu milli 30 og þrjátíu"}, "Random")
    assert int(json["answer"]) == 30

    json = qmcall(c, {"q": "kastaðu teningi"}, "Random")
    assert int(json["answer"]) >= 1 and int(json["answer"]) <= 6

    json = qmcall(c, {"q": "kastaðu átta hliða teningi"}, "Random")
    assert int(json["answer"]) >= 1 and int(json["answer"]) <= 8

    json = qmcall(c, {"q": "fiskur eða skjaldarmerki"}, "Random")
    a = json["answer"].lower()
    assert "fiskur" in a or "skjaldarmerki" in a

    json = qmcall(c, {"q": "kastaðu peningi"}, "Random")
    a = json["answer"].lower()
    assert "fiskur" in a or "skjaldarmerki" in a

    # Repeat module
    # NB: Disabled for now.
    # json = qmcall(c, {"q": "segðu setninguna simmi er bjálfi"}, "Parrot")
    # assert json["answer"] == "Simmi er bjálfi"
    # assert json["q"] == "Segðu setninguna „Simmi er bjálfi.“"

    json = qmcall(c, {"q": "segðu eitthvað skemmtilegt"})
    assert json["qtype"] != "Parrot"

    # Schedules module
    json = qmcall(c, {"q": "hvað er í sjónvarpinu núna", "voice": True}, "Schedule")
    assert json["key"] == "TelevisionSchedule"
    json = qmcall(c, {"q": "Hvaða þáttur er eiginlega á rúv núna"}, "Schedule")
    assert json["key"] == "TelevisionSchedule"
    json = qmcall(c, {"q": "hvað er í sjónvarpinu í kvöld?"}, "Schedule")
    assert json["key"] == "TelevisionEvening"
    json = qmcall(c, {"q": "hver er sjónvarpsdagskráin í kvöld?"}, "Schedule")
    assert json["key"] == "TelevisionEvening"
    # json = qmcall(c, {"q": "hvað er í útvarpinu núna?"}, "Schedule")
    # assert json["qkey"] == "RadioSchedule"
    # json = qmcall(c, {"q": "hvað er eiginlega í gangi á rás eitt?"}, "Schedule")
    # assert json["qkey"] == "RadioSchedule"
    # json = qmcall(c, {"q": "hvað er á dagskrá á rás tvö?"}, "Schedule")
    # assert json["qkey"] == "RadioSchedule"

    # Special module
    json = qmcall(client, {"q": "Hver er sætastur?", "voice": True}, "Special")
    assert json["answer"] == "Tumi Þorsteinsson."
    assert json["voice"] == "Tumi Þorsteinsson er langsætastur."

    json = qmcall(client, {"q": "Hver er tilgangur lífsins?"}, "Special")
    assert json["answer"].startswith("42")

    # Stats module
    json = qmcall(c, {"q": "hversu marga einstaklinga þekkirðu?"}, "Stats")
    json = qmcall(c, {"q": "Hversu mörgum spurningum hefur þú svarað?"}, "Stats")
    json = qmcall(c, {"q": "hvað ertu aðallega spurð um?"}, "Stats")
    json = qmcall(c, {"q": "hvaða fólk er mest í fréttum"}, "Stats")

    # Telephone module
    json = qmcall(c, {"q": "Hringdu í síma 6 9 9 2 4 2 2"}, "Telephone")
    assert "open_url" in json
    assert json["open_url"] == "tel:6992422"
    assert json["q"].endswith("6992422")

    json = qmcall(c, {"q": "hringdu fyrir mig í númerið 69 92 42 2"}, "Telephone")
    assert "open_url" in json
    assert json["open_url"] == "tel:6992422"
    assert json["q"].endswith("6992422")

    json = qmcall(c, {"q": "vinsamlegast hringdu í 921-7422"}, "Telephone")
    assert "open_url" in json
    assert json["open_url"] == "tel:9217422"
    assert json["q"].endswith("9217422")

    json = qmcall(c, {"q": "hringdu í 26"}, "Telephone")
    assert "ekki gilt símanúmer" in json["answer"]

    # Time module
    json = qmcall(c, {"q": "hvað er klukkan í Kaupmannahöfn?", "voice": True}, "Time")
    assert json["key"] == "Europe/Copenhagen"
    assert re.search(r"^\d\d:\d\d$", json["answer"])

    json = qmcall(c, {"q": "Hvað er klukkan núna", "voice": True}, "Time")
    assert json["key"] == "Atlantic/Reykjavik"
    assert re.search(r"^\d\d:\d\d$", json["answer"])
    assert json["voice"].startswith("Klukkan er")

    json = qmcall(c, {"q": "Hvað er klukkan í Japan?", "voice": True}, "Time")
    assert json["key"] == "Asia/Tokyo"
    assert re.search(r"^\d\d:\d\d$", json["answer"])
    assert json["voice"].lower().startswith("klukkan í japan er")

    # Unit module
    json = qmcall(c, {"q": "Hvað eru margir metrar í mílu?"}, "Unit")
    assert json["answer"] == "1.610 metrar"

    json = qmcall(c, {"q": "hvað eru margar sekúndur í tveimur dögum?"}, "Unit")
    assert json["answer"] == "173.000 sekúndur"

    json = qmcall(c, {"q": "hvað eru tíu steinar mörg kíló?"}, "Unit")
    assert json["answer"] == "63,5 kíló"

    json = qmcall(c, {"q": "hvað eru sjö vökvaúnsur margir lítrar"}, "Unit")
    assert json["answer"] == "0,21 lítrar"

    json = qmcall(c, {"q": "hvað eru 18 merkur mörg kíló"}, "Unit")
    assert json["answer"] == "4,5 kíló"

    json = qmcall(c, {"q": "hvað eru mörg korter í einum degi"}, "Unit")
    assert json["answer"].startswith("96")

    json = qmcall(c, {"q": "hvað eru margar mínútur í einu ári"}, "Unit")
    assert json["answer"].startswith("526.000 mínútur")

    # User info module
    json = qmcall(
        c,
        {"q": "ég heiti Gunna Jónsdóttir", "client_id": DUMMY_CLIENT_ID},
        "UserInfo",
    )
    assert json["answer"].startswith("Sæl og blessuð") and "Gunna" in json["answer"]

    json = qmcall(c, {"q": "hvað heiti ég", "client_id": DUMMY_CLIENT_ID})
    assert "Gunna Jónsdóttir" in json["answer"]

    json = qmcall(
        c, {"q": "Nafn mitt er Gunnar", "client_id": DUMMY_CLIENT_ID}, "UserInfo"
    )
    assert json["answer"].startswith("Sæll og blessaður") and "Gunnar" in json["answer"]

    json = qmcall(
        c, {"q": "veistu hvað ég heiti", "client_id": DUMMY_CLIENT_ID}, "UserInfo"
    )
    assert json["answer"].startswith("Þú heitir Gunnar")

    json = qmcall(c, {"q": "ég heiti Boutros Boutros-Ghali"}, "UserInfo")
    assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    json = qmcall(
        c,
        {
            "q": "hvaða útgáfu er ég að keyra",
            "client_type": "ios",
            "client_version": "1.1.0",
            "voice": True,
        },
    )
    assert "iOS" in json["answer"] and "1.1.0" in json["answer"]
    assert "komma" in json["voice"]

    json = qmcall(c, {"q": "á hvaða tæki ertu að keyra?", "client_type": "ios"})
    assert "iOS" in json["answer"]

    # json = qmcall(
    #     c,
    #     {"q": "ég á heima á öldugötu 4 í reykjavík", "client_id": DUMMY_CLIENT_ID},
    #     "UserInfo",
    # )
    # assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    # json = qmcall(c, {"q": "hvar á ég heima"}, "UserInfo")
    # assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    # json = qmcall(c, {"q": "ég á heima á Fiskislóð 31"}, "UserInfo")
    # assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    # json = qmcall(c, {"q": "hvar bý ég eiginlega"}, "UserInfo")
    # assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    # User location module
    # NB: No Google API key on test server
    if google_key:
        json = qmcall(c, {"q": "Hvar er ég"}, "UserLocation")
        assert "Fiskislóð 31" in json["answer"]
        json = qmcall(
            c, {"q": "Hvar í heiminum er ég eiginlega staddur?"}, "UserLocation"
        )
        assert "Fiskislóð 31" in json["answer"]

    # Weather module
    json = qmcall(c, {"q": "hvernig er veðrið í Reykjavík?"}, "Weather")
    assert re.search(r"^\-?\d+ °C", json["answer"]) is not None

    json = qmcall(c, {"q": "Hversu hlýtt er úti?"}, "Weather")
    assert re.search(r"^\-?\d+ °C", json["answer"]) is not None

    json = qmcall(c, {"q": "hversu kalt er í dag?"}, "Weather")
    assert re.search(r"^\-?\d+ °C", json["answer"]) is not None

    json = qmcall(c, {"q": "hver er veðurspáin?"}, "Weather")

    json = qmcall(c, {"q": "hver er veðurspáin fyrir morgundaginn"}, "Weather")
    assert len(json["answer"]) > 20 and "." in json["answer"]

    # Wikipedia module
    json = qmcall(c, {"q": "Hvað segir wikipedia um Jón Leifs?"}, "Wikipedia")
    assert "Wikipedía" in json["q"]  # Make sure it's being beautified
    assert "tónskáld" in json["answer"]
    assert "source" in json and "wiki" in json["source"].lower()

    json = qmcall(c, {"q": "hvað segir vikipedija um jóhann sigurjónsson"}, "Wikipedia")
    assert "Jóhann" in json["answer"]

    json = qmcall(c, {"q": "fræddu mig um berlín"}, "Wikipedia")
    assert "Berlín" in json["answer"]

    json = qmcall(
        c,
        {
            "q": "katrín Jakobsdóttir í vikipediju",
            "client_id": DUMMY_CLIENT_ID,
            "private": False,
        },
        "Wikipedia",
    )
    assert "Katrín Jakobsdóttir" in json["answer"]

    json = qmcall(
        c,
        {"q": "hvað segir wikipedía um hana", "client_id": DUMMY_CLIENT_ID},
        "Wikipedia",
    )
    assert "Katrín Jakobsdóttir" in json["answer"]

    # Words module
    json = qmcall(
        c, {"q": "hvernig stafar maður orðið hestur", "voice": True}, "Spelling"
    )
    assert json["answer"] == "H E S T U R"
    assert json["voice"].startswith("Orðið „hestur“ ")

    json = qmcall(c, {"q": "hvernig beygist orðið maður", "voice": True}, "Declension")
    assert json["answer"].lower() == "maður, mann, manni, manns"
    assert json["voice"].startswith("Orðið „maður“")
    json = qmcall(c, {"q": "hvernig beygir maður nafnorðið splorglobb?", "voice": True})
    assert json["voice"].startswith("Nafnorðið „splorglobb“ fannst ekki")

    # Yule lads module
    json = qmcall(
        c,
        {"q": "hvenær kemur fyrsti jólasveinninn til byggða", "voice": True},
        "YuleLads",
    )

    # Delete any queries or query data logged as result of these tests
    with SessionContext(commit=True) as session:
        session.execute(
            Query.table().delete().where(Query.client_id == DUMMY_CLIENT_ID)
        )
        session.execute(
            QueryData.table().delete().where(QueryData.client_id == DUMMY_CLIENT_ID)
        )
Example #16
0
def _process_result(result: Result) -> Dict[str, str]:
    """
    Return formatted description of arrival/departure
    time of flights to or from an Icelandic airport,
    based on info in result dict.
    """
    airport: str  # Icelandic or foreign airport/country
    api_airport: str  # Always an Icelandic airport, as the ISAVIA API only covers them

    departing: bool = result["departure"]
    if departing:
        # Departures (from Keflavík by default)
        api_airport = result.get("from_loc", "keflavík").lower()
        # Wildcard matches any flight (if airport wasn't specified)
        airport = result.get("to_loc", "*").lower()
    else:
        # Arrivals (to Keflavík by default)
        api_airport = result.get("to_loc", "keflavík").lower()
        airport = result.get("from_loc", "*").lower()

    from_date: datetime
    to_date: datetime
    days: int = result.get("day_count", 5)  # Check 5 days into future by default
    from_date = result.get("from_date", datetime.now(timezone.utc))
    to_date = result.get("to_date", datetime.now(timezone.utc) + timedelta(days=days))

    # Normalize airport/city names
    airport = _LOCATION_ABBREV_MAP.get(airport, airport)
    airport = NounPhrase(airport).nominative or airport

    api_airport = _LOCATION_ABBREV_MAP.get(api_airport, api_airport)
    api_airport = NounPhrase(api_airport).nominative or api_airport

    # Translate Icelandic airport to its IATA code
    iata_code: str = _AIRPORT_TO_IATA_MAP.get(api_airport, api_airport)

    # TODO: Currently module only fetches one flight,
    # modifications to the grammar could allow fetching of more flights at once
    flight_count: int = result.get("flight_count", 1)

    flight_data: FlightList
    # Check first if function result in cache, else fetch data from API
    if departing in _FLIGHT_CACHE:
        flight_data = _FLIGHT_CACHE[departing]
    else:
        flight_data = _fetch_flight_data(from_date, to_date, iata_code, departing)

    flight_data = _filter_flight_data(flight_data, airport, api_airport, flight_count)

    answ: Dict[str, str] = dict()
    if len(flight_data) > 0:
        # (Format month names in Icelandic)
        with changedlocale(category="LC_TIME"):
            answ = _format_flight_answer(flight_data)
    else:
        to_airp: str
        from_airp: str
        if departing:
            to_airp, from_airp = airport, api_airport
        else:
            from_airp, to_airp = airport, api_airport

        to_airp = icelandic_city_name(capitalize_placename(to_airp))
        from_airp = icelandic_city_name(capitalize_placename(from_airp))

        from_airp = NounPhrase(from_airp).dative or from_airp
        to_airp = NounPhrase(to_airp).genitive or to_airp

        if from_airp == "*":
            answ["answer"] = f"Ekkert flug fannst til {to_airp} næstu {days} daga."
        elif to_airp == "*":
            answ["answer"] = f"Ekkert flug fannst frá {from_airp} næstu {days} daga."
        else:
            answ["answer"] = (
                f"Ekkert flug fannst "
                f"frá {from_airp} "
                f"til {to_airp} "
                f"næstu {days} daga."
            )
        answ["voice"] = answ["answer"]

    return answ
Example #17
0
def test_query_api(client):
    """ Make various query API calls and validate response. """

    c = client

    # Arithmetic module
    ARITHM_QUERIES = {
        "hvað er fimm sinnum tólf": "60",
        "hvað er 12 sinnum 12?": "144",
        "hvað er nítján plús 3": "22",
        "hvað er hundrað mínus sautján": "83",
        "hvað er 17 deilt með fjórum": "4,25",
        "hver er kvaðratrótin af 256": "16",
        "hvað er 12 í þriðja veldi": "1728",
        "hvað eru tveir í tíunda veldi": "1024",
        "hvað eru 17 prósent af 20": "3,4",
        "hvað er 7000 deilt með 812": "8,62",
        "hvað er þrisvar sinnum sjö": "21",
        "hvað er fjórðungur af 28": "7",
        "hvað er einn tuttugasti af 192": "9,6",
        "reiknaðu 7 sinnum 7": "49",
        "geturðu reiknað kvaðratrótina af 9": "3",
        "hvað er 8900 með vaski": "11.036",
        "hvað eru 7500 krónur með virðisaukaskatti": "9.300",
    }

    for q, a in ARITHM_QUERIES.items():
        json = qmcall(c, {"q": q, "voice": True})
        assert json["qtype"] == "Arithmetic"
        assert json["answer"] == a

    json = qmcall(c, {"q": "hvað er pí", "client_id": DUMMY_CLIENT_ID, "private": False})
    assert "π" in json["answer"]
    assert json["qtype"] == "PI"
    assert "3,14159" in json["answer"]

    json = qmcall(c, {"q": "hvað er það sinnum tveir", "client_id": DUMMY_CLIENT_ID, "private": False})
    assert json["qtype"] == "Arithmetic"
    assert json["answer"].startswith("6,")

    # Person and entity title queries are tested using a dummy database
    # populated with data from CSV files stored in tests/test_files/testdb_*.csv

    # Builtin module: title
    json = qmcall(c, {"q": "hver er viðar þorsteinsson", "voice": True})
    assert json["qtype"] == "Person"
    assert json["voice"].startswith("Viðar Þorsteinsson er ")
    assert json["voice"].endswith(".")

    # Builtin module: title
    json = qmcall(c, {"q": "hver er björn þorsteinsson", "voice": True})
    assert json["qtype"] == "Person"
    assert json["voice"].startswith("Björn Þorsteinsson er ")
    assert json["voice"].endswith(".")

    # Builtin module: person
    json = qmcall(c, {"q": "hver er forsætisráðherra", "voice": True})
    assert json["qtype"] == "Title"
    assert json["voice"].startswith("Forsætisráðherra er ")
    assert json["voice"].endswith(".")

    # Bus module
    json = qmcall(c, {"q": "hvaða stoppistöð er næst mér", "voice": True})
    assert json["qtype"] == "NearestStop"
    assert json["answer"] == "Fiskislóð"
    assert json["voice"] == "Næsta stoppistöð er Fiskislóð; þangað eru 310 metrar."

    json = qmcall(
        c, {"q": "hvenær er von á vagni númer 17", "voice": True, "test": False}
    )
    assert json["qtype"] == "ArrivalTime"
    assert json["answer"] == "Staðsetning óþekkt"  # No location info available

    # Counting module
    json = qmcall(c, {"q": "teldu frá einum upp í tíu"})
    assert json["qtype"] == "Counting"
    assert json["answer"] == "1…10"

    json = qmcall(c, {"q": "teldu hratt niður frá 4", "voice": True})
    assert json["qtype"] == "Counting"
    assert json["answer"] == "3…0"
    assert "<break time=" in json["voice"]

    json = qmcall(c, {"q": "teldu upp að 5000", "voice": True})
    assert json["qtype"] == "Counting"
    assert len(json["voice"]) < 100

    # Currency module
    json = qmcall(c, {"q": "Hvert er gengi dönsku krónunnar?"})
    assert json["qtype"] == "Currency"
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað kostar evran"})
    assert json["qtype"] == "Currency"
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "Hvert er gengi krónunnar gagnvart dollara í dag?"})
    assert json["qtype"] == "Currency"
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað eru tíu þúsund krónur margir dollarar"})
    assert json["qtype"] == "Currency"
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    json = qmcall(c, {"q": "hvað eru 79 dollarar margir evrur?"})
    assert json["qtype"] == "Currency"
    assert re.search(r"^\d+(,\d+)?$", json["answer"]) is not None

    # Date module
    SPECIAL_DAYS = (
        "jólin",
        "gamlársdagur",
        "nýársdagur",
        "hvítasunna",
        "páskar",
        "þjóðhátíðardagurinn",
        "baráttudagur verkalýðsins",
        "öskudagur",
        "skírdagur",
        "sumardagurinn fyrsti",
        "verslunarmannahelgi",
        "þorláksmessa",
        "föstudagurinn langi",
        "menningarnótt",
        "sjómannadagurinn",
        "dagur íslenskrar tungu",
        "annar í jólum",
        "feðradagur",
        "mæðradagurinn",
    )
    for d in SPECIAL_DAYS:
        qstr = "hvenær er " + d
        json = qmcall(c, {"q": qstr})
        assert json["qtype"] == "Date"

    json = qmcall(c, {"q": "Hver er dagsetningin?"})
    assert json["qtype"] == "Date"
    assert json["answer"].endswith(datetime.now().strftime("%Y"))

    json = qmcall(c, {"q": "Hvað eru margir dagar til jóla?", "voice": True})
    assert json["qtype"] == "Date"
    assert re.search(r"^\d+", json["answer"])
    assert "dag" in json["voice"]

    json = qmcall(c, {"q": "Hvað eru margir dagar í 12. maí?"})
    assert json["qtype"] == "Date"
    assert "dag" in json["answer"] or "á morgun" in answer 

    now = datetime.utcnow()

    with changedlocale(category="LC_TIME"):
        # Today
        dstr = now.date().strftime("%-d. %B")
        json = qmcall(c, {"q": "Hvað eru margir dagar í " + dstr})
        assert "í dag" in json["answer"]
        # Tomorrow
        dstr = (now.date() + timedelta(days=1)).strftime("%-d. %B")
        json = qmcall(c, {"q": "Hvað eru margir dagar í " + dstr})
        assert "á morgun" in json["answer"]

    json = qmcall(c, {"q": "hvaða ár er núna?"})
    assert json["qtype"] == "Date"
    assert str(now.year) in json["answer"]

    json = qmcall(c, {"q": "er hlaupár?"})
    assert json["qtype"] == "Date"
    assert str(now.year) in json["answer"]

    json = qmcall(c, {"q": "er 2020 hlaupár?"})
    assert json["qtype"] == "Date"
    assert "er hlaupár" in json["answer"]

    json = qmcall(c, {"q": "var árið 1999 hlaupár?"})
    assert json["qtype"] == "Date"
    assert "ekki hlaupár" in json["answer"]

    json = qmcall(c, {"q": "hvað eru margir dagar í desember"})
    assert json["qtype"] == "Date"
    assert json["answer"].startswith("31")
    assert "dag" in json["answer"]

    json = qmcall(c, {"q": "hvað eru margir dagar í febrúar 2024"})
    assert json["qtype"] == "Date"
    assert json["answer"].startswith("29")
    assert "dag" in json["answer"]


    json = qmcall(c, {"q": "Hvað er langt fram að verslunarmannahelgi"})
    assert json["qtype"] == "Date"
    assert re.search(r"^\d+", json["answer"])

    # json = qmcall(c, {"q": "hvað er langt liðið frá uppstigningardegi"})
    # assert json["qtype"] == "Date"
    # assert re.search(r"^\d+", json["answer"])

    json = qmcall(c, {"q": "hvenær eru jólin"})
    assert json["qtype"] == "Date"
    assert re.search(r"25", json["answer"]) is not None

    # Distance module
    # NB: No Google API key on test server
    # json = qmcall(c, {"q": "Hvað er ég langt frá Perlunni", "voice": True})
    # assert json["qtype"] == "Distance"
    # assert json["answer"].startswith("3,5 km")
    # assert json["voice"].startswith("Perlan er ")
    # assert json["source"] == "Google Maps"

    # json = qmcall(c, {"q": "hvað er langt í melabúðina", "voice": True})
    # assert json["qtype"] == "Distance"
    # assert json["answer"].startswith("1,4 km")
    # assert json["voice"].startswith("Melabúðin er ")

    # Flights module
    # TODO: Implement me!

    # Geography module
    json = qmcall(c, {"q": "Hver er höfuðborg Spánar?"})
    assert json["qtype"] == "Geography"
    assert json["answer"] == "Madríd"

    json = qmcall(c, {"q": "hver er höfuðborg norður-makedóníu?"})
    assert json["qtype"] == "Geography"
    assert json["answer"] == "Skopje"

    json = qmcall(c, {"q": "Hvað er höfuðborgin í Bretlandi"})
    assert json["qtype"] == "Geography"
    assert json["answer"] == "Lundúnir"

    json = qmcall(c, {"q": "Í hvaða landi er Jóhannesarborg?"})
    assert json["qtype"] == "Geography"
    assert json["answer"].endswith("Suður-Afríku")

    json = qmcall(c, {"q": "Í hvaða heimsálfu er míkrónesía?"})
    assert json["qtype"] == "Geography"
    assert json["answer"].startswith("Eyjaálfu")

    json = qmcall(c, {"q": "Hvar er máritanía?"})
    assert json["qtype"] == "Geography"
    assert "Afríku" in json["answer"]

    json = qmcall(c, {"q": "Hvar er Kaupmannahöfn?"})
    assert json["qtype"] == "Geography"
    assert "Danmörku" in json["answer"]

    # Intro module
    json = qmcall(c, {"q": "ég heiti Gunna"})
    assert json["qtype"] == "Introduction"
    assert json["answer"].startswith("Sæl og blessuð")

    json = qmcall(c, {"q": "ég heiti Gunnar"})
    assert json["qtype"] == "Introduction"
    assert json["answer"].startswith("Sæll og blessaður")

    json = qmcall(c, {"q": "ég heiti Boutros Boutros-Ghali"})
    assert json["qtype"] == "Introduction"
    assert json["answer"].startswith("Gaman að kynnast") and "Boutros" in json["answer"]

    # Location module
    # NB: No Google API key on test server
    # json = qmcall(c, {"q": "Hvar er ég", "latitude": 64.15673429618045, "longitude": -21.9511777069624})
    # assert json["qtype"] == "Location"
    # assert json["answer"].startswith("Fiskislóð 31")

    # News module
    json = qmcall(c, {"q": "Hvað er í fréttum", "voice": True})
    assert json["qtype"] == "News"
    assert len(json["answer"]) > 80  # This is always going to be a long answer
    assert json["voice"].startswith("Í fréttum rúv er þetta helst")

    # Opinion module
    json = qmcall(c, {"q": "Hvað finnst þér um loftslagsmál?"})
    assert json["qtype"] == "Opinion"
    assert json["answer"].startswith("Ég hef enga sérstaka skoðun")

    json = qmcall(c, {"q": "hvaða skoðun hefurðu á þriðja orkupakkanum"})
    assert json["qtype"] == "Opinion"
    assert json["answer"].startswith("Ég hef enga sérstaka skoðun")

    # Petrol module
    json = qmcall(c, {"q": "Hvar er næsta bensínstöð", "voice": True})
    assert json["qtype"] == "Petrol"
    assert "Ánanaust" in json["answer"]
    assert "source" in json and json["source"].startswith("Gasvaktin")

    json = qmcall(c, {"q": "Hvar fæ ég ódýrt bensín í nágrenninu", "voice": True})
    assert json["qtype"] == "Petrol"
    assert "source" in json and json["source"].startswith("Gasvaktin")

    json = qmcall(c, {"q": "Hvar fæ ég ódýrasta bensínið"})
    assert json["qtype"] == "Petrol"
    assert "source" in json and json["source"].startswith("Gasvaktin")

    # Places module
    # TODO: Implement me!

    # Random module
    json = qmcall(c, {"q": "Veldu tölu milli sautján og 30"})
    assert json["qtype"] == "Random"
    assert int(json["answer"]) >= 17 and int(json["answer"]) <= 30

    json = qmcall(c, {"q": "kastaðu teningi"})
    assert json["qtype"] == "Random"
    assert int(json["answer"]) >= 1 and int(json["answer"]) <= 6

    json = qmcall(c, {"q": "kastaðu átta hliða teningi"})
    assert json["qtype"] == "Random"
    assert int(json["answer"]) >= 1 and int(json["answer"]) <= 8

    json = qmcall(c, {"q": "fiskur eða skjaldarmerki"})
    assert json["qtype"] == "Random"
    a = json["answer"].lower()
    assert "fiskur" in a or "skjaldarmerki" in a

    json = qmcall(c, {"q": "kastaðu peningi"})
    assert json["qtype"] == "Random"
    a = json["answer"].lower()
    assert "fiskur" in a or "skjaldarmerki" in a

    # Special module
    json = qmcall(client, {"q": "Hver er sætastur?", "voice": True})
    assert json["qtype"] == "Special"
    assert json["answer"] == "Tumi Þorsteinsson."
    assert json["voice"] == "Tumi Þorsteinsson er langsætastur."

    # Stats module
    json = qmcall(c, {"q": "hversu marga einstaklinga þekkirðu?"})
    assert json["qtype"] == "Stats"

    json = qmcall(c, {"q": "Hversu mörgum spurningum hefur þú svarað?"})
    assert json["qtype"] == "Stats"

    json = qmcall(c, {"q": "hvað ertu aðallega spurð um?"})
    assert json["qtype"] == "Stats"

    # Telephone module
    json = qmcall(c, {"q": "Hringdu í síma 6 9 9 2 4 2 2"})
    assert json["qtype"] == "Telephone"
    assert "open_url" in json
    assert json["open_url"] == "tel:6992422"
    assert json["q"].endswith("6992422")

    json = qmcall(c, {"q": "hringdu fyrir mig í númerið 69 92 42 2"})
    assert json["qtype"] == "Telephone"
    assert "open_url" in json
    assert json["open_url"] == "tel:6992422"
    assert json["q"].endswith("6992422")

    json = qmcall(c, {"q": "vinsamlegast hringdu í 921-7422"})
    assert json["qtype"] == "Telephone"
    assert "open_url" in json
    assert json["open_url"] == "tel:9217422"
    assert json["q"].endswith("9217422")

    # Time module
    json = qmcall(c, {"q": "hvað er klukkan í Kaupmannahöfn?", "voice": True})
    assert json["qtype"] == "Time"
    assert json["key"] == "Europe/Copenhagen"
    assert re.search(r"^\d\d:\d\d$", json["answer"])

    json = qmcall(c, {"q": "Hvað er klukkan núna", "voice": True})
    assert json["qtype"] == "Time"
    assert json["key"] == "Atlantic/Reykjavik"
    assert re.search(r"^\d\d:\d\d$", json["answer"])
    assert json["voice"].startswith("Klukkan er")

    json = qmcall(c, {"q": "Hvað er klukkan í Japan?", "voice": True})
    assert json["qtype"] == "Time"
    assert json["key"] == "Asia/Tokyo"
    assert re.search(r"^\d\d:\d\d$", json["answer"])
    assert json["voice"].lower().startswith("klukkan í japan er")

    # Schedules module
    json = qmcall(c, {"q": "hvað er í sjónvarpinu núna", "voice": True})
    assert json["qtype"] == "Schedule"

    json = qmcall(c, {"q": "Hvaða þáttur er eiginlega á rúv núna"})
    assert json["qtype"] == "Schedule"

    json = qmcall(c, {"q": "hvað er í sjónvarpinu í kvöld?"})
    assert json["qtype"] == "Schedule"

    json = qmcall(c, {"q": "hver er sjónvarpsdagskráin í kvöld?"})
    assert json["qtype"] == "Schedule"

    # json = qmcall(c, {"q": "hvað er eiginlega í gangi á rás eitt?"})
    # assert json["qtype"] == "Schedule"

    # json = qmcall(c, {"q": "hvað er á daskrá á rás 2?"})
    # assert json["qtype"] == "Schedule"

    # json = qmcall(c, {"q": "Hvað er í sjónvarpinu núna í kvöld?"})
    # assert json["qtype"] == "TelevisionEvening"

    # Unit module
    json = qmcall(c, {"q": "Hvað eru margir metrar í mílu?"})
    assert json["qtype"] == "Unit"
    assert json["answer"] == "1.610 metrar"

    json = qmcall(c, {"q": "hvað eru margar sekúndur í tveimur dögum?"})
    assert json["qtype"] == "Unit"
    assert json["answer"] == "173.000 sekúndur"

    json = qmcall(c, {"q": "hvað eru tíu steinar mörg kíló?"})
    assert json["qtype"] == "Unit"
    assert json["answer"] == "63,5 kíló"

    json = qmcall(c, {"q": "hvað eru sjö vökvaúnsur margir lítrar"})
    assert json["qtype"] == "Unit"
    assert json["answer"] == "0,21 lítrar"

    json = qmcall(c, {"q": "hvað eru 18 merkur mörg kíló"})
    assert json["qtype"] == "Unit"
    assert json["answer"] == "4,5 kíló"

    json = qmcall(c, {"q": "hvað eru mörg korter í einum degi"})
    assert json["qtype"] == "Unit"
    assert json["answer"].startswith("96")

    json = qmcall(c, {"q": "hvað eru margar mínútur í einu ári"})
    assert json["qtype"] == "Unit"
    assert json["answer"].startswith("526.000 mínútur")

    # Weather module
    json = qmcall(c, {"q": "hvernig er veðrið í Reykjavík?"})
    assert json["qtype"] == "Weather"
    assert re.search(r"^\-?\d+°", json["answer"]) is not None

    json = qmcall(c, {"q": "Hversu hlýtt er úti?"})
    assert json["qtype"] == "Weather"
    assert re.search(r"^\-?\d+°", json["answer"]) is not None

    json = qmcall(c, {"q": "hver er veðurspáin fyrir morgundaginn"})
    assert json["qtype"] == "Weather"
    assert len(json["answer"]) > 20 and "." in json["answer"]

    # Wikipedia module
    json = qmcall(c, {"q": "Hvað segir wikipedia um Jón Leifs?"})
    assert json["qtype"] == "Wikipedia"
    assert "Wikipedía" in json["q"]  # Make sure it's being beautified
    assert "tónskáld" in json["answer"]
    assert "source" in json and "wiki" in json["source"].lower()

    json = qmcall(c, {"q": "hvað segir vikipedija um jóhann sigurjónsson"})
    assert json["qtype"] == "Wikipedia"
    assert "Jóhann" in json["answer"]

    json = qmcall(c, {"q": "fræddu mig um berlín"})
    assert json["qtype"] == "Wikipedia"
    assert "Berlín" in json["answer"]

    json = qmcall(c, {"q": "katrín Jakobsdóttir í vikipediju", "client_id": DUMMY_CLIENT_ID, "private": False})
    assert json["qtype"] == "Wikipedia"
    assert "Katrín Jakobsdóttir" in json["answer"]

    json = qmcall(c, {"q": "hvað segir wikipedía um hana", "client_id": DUMMY_CLIENT_ID, "private": False})
    assert json["qtype"] == "Wikipedia"
    assert "Katrín Jakobsdóttir" in json["answer"]

    # Words module
    json = qmcall(c, {"q": "hvernig stafar maður orðið hestur", "voice": True})
    assert json["qtype"] == "Spelling"
    assert json["answer"] == "H E S T U R"
    assert json["voice"].startswith("Orðið 'hestur'")

    json = qmcall(c, {"q": "hvernig beygist orðið maður", "voice": True})
    assert json["qtype"] == "Declension"
    assert json["answer"].lower() == "maður, mann, manni, manns"
    assert json["voice"].startswith("Orðið 'maður'")
Example #18
0
def wordfreq():
    """ Return word frequency chart data for a given time period. """
    resp: Dict[str, Any] = dict(err=True)
    # Create datetime objects from query string args
    try:
        date_fmt = "%Y-%m-%d"
        date_from = datetime.strptime(request.args.get("date_from", ""),
                                      date_fmt)
        date_to = datetime.strptime(request.args.get("date_to", ""), date_fmt)
    except Exception as e:
        logging.warning("Failed to parse date arg: {0}".format(e))
        return better_jsonify(**resp)

    # Words param should contain one or more comma-separated word
    # lemmas with optional category specified with :cat suffix
    warg = request.args.get("words")
    if not warg:
        return better_jsonify(**resp)

    # Create word/cat pair from token
    def cat4token(t: Tok) -> Tuple[str, str]:
        assert t.kind in (TOK.WORD, TOK.PERSON, TOK.ENTITY)
        # TODO: Use GreynirPackage lemma lookup function for this
        w, cat = t.txt, ""
        if t.kind == TOK.WORD:
            val = list(filter(lambda m: m.stofn == m.ordmynd,
                              t.meanings)) or t.meanings
            cat = val[0].ordfl if len(val) else CAT_UNKNOWN
            w = val[0].stofn if len(val) else t.txt
            # Hack to fix combined word, remove hyphens added by combinator
            if w.count("-") > t.txt.count("-"):
                san = ""
                txtlen = len(t.txt)
                for i, char in enumerate(w):
                    if char == "-" and i < txtlen and t.txt[i] != "-":
                        continue
                    san += char
                w = san
        elif t.kind == TOK.PERSON:
            cat = "person_" + (t.person_names[0].gender or "hk")
        elif t.kind == TOK.ENTITY:
            cat = "entity"
        return (w, cat)

    # Parse arg string into word/cat tuples
    wds = _str2words(warg)

    # Try to tokenize each item that doesn't have a category
    nwds = []
    for w, c in wds:
        if c is None or c == CAT_UNKNOWN:
            # Try to tokenize
            tokens = list(
                filter(lambda x: x.kind in _VALID_TOKENS, tokenize(w)))
            for t in tokens:
                nwds.append(cat4token(t))
        else:
            nwds.append((w, c))

    # Filter all words not in allowed category and restrict no. words
    words = list(filter(lambda x: x[1] in _VALID_WCATS, nwds))
    words = words[:_MAX_NUM_WORDS]

    # Generate date labels
    now = datetime.utcnow()
    delta = date_to - date_from
    with changedlocale(category="LC_TIME"):
        # Group by week if period longer than 3 months
        label_date_strings: List[Union[str, Tuple[str, str]]] = []
        if delta.days >= _SHOW_WEEKS_CUTOFF:
            timeunit = "week"
            label_dates = [(
                (date_from + timedelta(days=i * 7)),
                (date_from + timedelta(days=(i * 7) + 6)),
            ) for i in range(int((delta.days + 1) / 7))]
            # Construct elegant week date labels w. no superfluous information
            labels = []
            for (d1, d2) in label_dates:
                if d1.month == d2.month:
                    d1fmt = "%-d."
                    d2fmt = "%-d. %b"
                else:
                    d1fmt = d2fmt = "%-d. %b"
                if d1.year != now.year and d1.year != d2.year:
                    d1fmt += " %Y"
                if d2.year != now.year:
                    d2fmt += " %Y"
                labels.append("{0}-{1}".format(d1.strftime(d1fmt),
                                               d2.strftime(d2fmt)))
            # Convert dates to strings for client-side
            label_date_strings = [(df.strftime("%Y-%m-%d"),
                                   dt.strftime("%Y-%m-%d"))
                                  for df, dt in label_dates]
        # Group by day
        else:
            timeunit = "day"
            label_days = [
                date_from + timedelta(days=i) for i in range(delta.days)
            ]
            labels = [
                d.strftime("%-d. %b")
                if d.year == now.year else d.strftime("%-d. %b %Y")
                for d in label_days
            ]
            label_date_strings = [d.strftime("%Y-%m-%d") for d in label_days]

    # Create datasets for front-end chart
    colors = list(_LINE_COLORS)
    data: Dict[str, Any] = dict(labels=labels,
                                labelDates=label_date_strings,
                                datasets=[])
    with SessionContext(commit=False) as session:
        for w in words:
            # Look up frequency of word for the given period
            (wd, cat) = w
            res = WordFrequencyQuery.frequency(
                wd,
                cat,
                date_from,
                date_to,
                timeunit=timeunit,
                enclosing_session=session,
            )
            # Generate data and config for chart
            label = "{0} ({1})".format(wd, CAT_DESC.get(cat))
            ds: Dict[str, Any] = dict(label=label, fill=False, lineTension=0)
            ds["borderColor"] = ds["backgroundColor"] = colors.pop(0)
            ds["data"] = [r[1] for r in res]
            ds["word"] = "{0}:{1}".format(wd, cat)
            data["datasets"].append(ds)

    # Create response
    resp["err"] = False
    resp["data"] = data
    resp["words"] = _words2str(words)

    return better_jsonify(**resp)
Example #19
0
    def read(self, fname, verbose = False, write_binary = True):
        """ Read grammar from a text file. Set verbose = True to get diagnostic messages
            about unused nonterminals and nonterminals that are unreachable from the root.
            Set write_binary = False to avoid writing a fresh binary file if the
            grammar text file is newer than the existing binary file. """

        # Clear previous file info, if any
        self._file_time = self._file_name = None
        # Shortcuts
        terminals = self._terminals
        nonterminals = self._nonterminals
        grammar = self._nt_dict
        # The number of the current line in the grammar file
        line = 0
        # Reset the sequence of production indices
        Production.reset()

        # Dictionary of variants, keyed by variant name
        # where the values are lists of variant options (strings)
        variants = OrderedDict()
        current_line = ""

        def parse_line(s):

            s = s.strip()
            if not s:
                # Blank line: ignore
                return

            def _parse_rhs(nt_id, vts, s, priority):
                """ Parse a right-hand side sequence, eventually with relative priority
                    within the nonterminal """

                def _add_rhs(nt_id, rhs, priority = 0):
                    """ Add a fully expanded right-hand-side production to a nonterminal rule """
                    nt = nonterminals[nt_id]
                    if nt not in grammar:
                        # First production of this nonterminal
                        grammar[nt] = [ ] if rhs is None else [ (priority, rhs) ]
                        return
                    if rhs is None:
                        return
                    if rhs.is_empty:
                        # Adding epsilon production: avoid multiple ones
                        if any(p.is_empty for _, p in grammar[nt]):
                            return
                    # Append to the list of productions of this nonterminal
                    grammar[nt].append((priority, rhs))

                s = s.strip()
                if not s:
                    raise GrammarError("Invalid syntax for production", fname, line)

                tokens = s.split()

                # rhs is a list of tuples, one for each token, as follows:
                # (id, repeat, variants)
                rhs = []

                # vfree is a set of 'free variants', i.e. variants that
                # occur in the right hand side of the production but not in
                # the nonterminal (those are in vts)
                vfree = set()

                for r in tokens:

                    if r == "0":
                        # Empty (epsilon) production
                        if len(tokens) != 1:
                            raise GrammarError("Empty (epsilon) rule must be of the form NT -> 0", fname, line)
                        rhs.append((None, None, None))
                        break

                    # Check for repeat/conditionality
                    repeat = None
                    if r[-1] in '*+?':
                        # Optional repeat/conditionality specifier
                        # Asterisk: Can be repeated 0 or more times
                        # Plus: Can be repeated 1 or more times
                        # Question mark: optionally present once
                        repeat = r[-1]
                        r = r[0:-1]

                    # Check for variant specs
                    v = r.split('/')
                    r = v[0]
                    v = v[1:]
                    if not v:
                        v = None
                    else:
                        for vspec in v:
                            # if vspec not in vts:
                            if vspec not in variants:
                                raise GrammarError("Unknown variant '{0}'".format(vspec), fname, line)
                            if vspec not in vts:
                                # Free variant: add to set
                                vfree.add(vspec)

                    if r[0] in "\"'":
                        # Literal terminal symbol
                        if len(r) < 3 or r[0] not in r[2:]:
                            raise GrammarError("Invalid literal terminal {0}".format(r), fname, line)
                    else:
                        # Identifier of nonterminal or terminal
                        if not r.isidentifier():
                            raise GrammarError("Invalid identifier '{0}'".format(r), fname, line)
                    rhs.append((r, repeat, v))

                assert len(rhs) == len(tokens)

                # Generate productions for all variants

                def variant_values(vlist):
                    """ Returns a list of names with all applicable variant options appended """
                    if not vlist:
                        yield [ "" ]
                        return
                    if len(vlist) == 1:
                        for vopt in variants[vlist[0]]:
                            yield [ vopt ]
                        return
                    for v in variant_values(vlist[1:]):
                        for vopt in variants[vlist[0]]:
                            yield [ vopt ] + v

                # Make a list of all variants that occur in the
                # nonterminal or on the right hand side
                vall = vts + list(vfree)

                for vval in variant_values(vall):
                    # Generate a production for every variant combination

                    # Calculate the nonterminal suffix for this variant
                    # combination
                    nt_suffix = "_".join(vval[vall.index(vx)] for vx in vts) if vts else ""
                    if nt_suffix:
                        nt_suffix = "_" + nt_suffix

                    result = Production(fname, line, priority = priority)
                    for r, repeat, v in rhs:
                        # Calculate the token suffix, if any
                        # This may be different from the nonterminal suffix as
                        # the token may have fewer variants than the nonterminal,
                        # and/or free ones that don't appear in the nonterminal.
                        if r is None:
                            # Epsilon
                            n = None
                        else:
                            suffix = "_".join(vval[vall.index(vx)] for vx in v) if v else ""
                            if suffix:
                                suffix = "_" + suffix
                            sym = r + suffix
                            if r[0] in "'\"":
                                # Literal terminal
                                if sym not in terminals:
                                    terminals[sym] = self._make_literal_terminal(sym)
                                n = terminals[sym]
                            elif r[0].isupper():
                                # Identifier of nonterminal
                                if sym not in nonterminals:
                                    nonterminals[sym] = self._make_nonterminal(sym, fname, line)
                                n = nonterminals[sym]
                                n.add_ref() # Note that the nonterminal has been referenced
                            else:
                                # Identifier of terminal
                                if sym not in terminals:
                                    terminals[sym] = self._make_terminal(sym)
                                n = terminals[sym]

                        # If the production item can be repeated,
                        # create a new production and substitute.
                        # A -> B C* D becomes:
                        # A -> B C_new_* D
                        # C_new_* -> C_new_* C | 0
                        # A -> B C+ D becomes:
                        # A -> B C_new_+ D
                        # C_new_+ -> C_new_+ C | C
                        # A -> B C? D becomes:
                        # A -> B C_new_? D
                        # C_new_? -> C | 0

                        if repeat is not None:
                            if n is None:
                                raise GrammarError("Epsilon (0) cannot be repeated with * or +", fname, line)
                            new_nt_id = sym + repeat
                            # Make the new nonterminal and production if not already there
                            if new_nt_id not in nonterminals:
                                new_nt = nonterminals[new_nt_id] = self._make_nonterminal(new_nt_id, fname, line)
                                new_nt.add_ref()
                                # Note that the Earley algorithm is more efficient on left recursion
                                # than middle or right recursion. Therefore it is better to generate
                                # Cx -> Cx C than Cx -> C Cx.
                                # First production: Cx C
                                new_p = Production(fname, line)
                                if repeat != '?':
                                    new_p.append(new_nt) # C* / C+
                                new_p.append(n) # C
                                _add_rhs(new_nt_id, new_p) # Default priority 0
                                # Second production: epsilon(*, ?) or C(+)
                                new_p = Production(fname, line)
                                if repeat == '+':
                                    new_p.append(n)
                                _add_rhs(new_nt_id, new_p) # Default priority 0
                            # Substitute the Cx in the original production
                            n = nonterminals[new_nt_id]

                        if n is not None:
                            result.append(n)

                    assert len(result) == len(rhs) or (len(rhs) == 1 and rhs[0] == (None, None, None))

                    nt_id_full = nt_id + nt_suffix

                    if len(result) == 1 and result[0] == nonterminals[nt_id_full]:
                        # Nonterminal derives itself
                        raise GrammarError("Nonterminal {0} deriving itself".format(nt_id_full), fname, line)
                    _add_rhs(nt_id_full, result, priority)

            def variant_names(nt, vts):
                """ Returns a list of names with all applicable variant options appended """
                result = [ nt ]
                for v in vts:
                    newresult = []
                    for vopt in variants[v]:
                        for r in result:
                            newresult.append(r + "_" + vopt)
                    result = newresult
                return result

            def apply_to_nonterminals(s, func):
                """ Parse a nonterminal/var list from string s, then apply func(nt, p) to
                    all nonterminals, where p is the parameter of the pragma """
                ix = s.find(')')
                if ix < 0:
                    raise GrammarError("Expected right parenthesis in pragma", fname, line)
                param = s[0 : ix].strip()
                s = s[ix + 1:]
                nts = s.split()
                for nt_name in nts:
                    ntv = nt_name.split('/')
                    #if not ntv[0].isidentifier():
                    #    raise GrammarError("Invalid nonterminal name '{0}'".format(ntv[0]), fname, line)
                    for vname in ntv[1:]:
                        if vname not in variants:
                            raise GrammarError("Unknown variant '{0}' for nonterminal '{1}'".format(vname, ntv[0]), fname, line)
                    var_names = variant_names(ntv[0], ntv[1:])
                    for vname in var_names:
                        if vname not in nonterminals:
                            raise GrammarError("Unknown nonterminal '{0}'".format(vname), fname, line)
                        try:
                            func(nonterminals[vname], param)
                        except:
                            raise GrammarError("Invalid pragma argument '{0}'".format(param), fname, line)

            if s.startswith('/'):
                # Definition of variant
                # A variant is specified as /varname = opt1 opt2 opt3...
                v = s.split('=', maxsplit = 1)
                if len(v) != 2:
                    raise GrammarError("Invalid variant syntax", fname, line)
                vname = v[0].strip()[1:]
                if "_" in vname or not vname.isidentifier():
                    # Variant names must be valid identifiers without underscores
                    raise GrammarError("Invalid variant name '{0}'".format(vname), fname, line)
                v = v[1].split()
                for vopt in v:
                    if "_"  in vopt or not vopt.isidentifier():
                        # Variant options must be valid identifiers without underscores
                        raise GrammarError("Invalid option '{0}' in variant '{1}'".format(vopt, vname), fname, line)
                variants[vname] = v
            elif s.startswith('$'):
                # Pragma
                s = s.strip()
                PRAGMA_SCORE = "$score("
                PRAGMA_ROOT = "$root("
                PRAGMA_TAG = "$tag("
                if s.startswith(PRAGMA_SCORE):
                    # Pragma $score(int) Nonterminal/var1/var2 ...
                    s = s[len(PRAGMA_SCORE):]

                    def set_score(nt, score):
                        self._nt_scores[nt] = int(score)

                    apply_to_nonterminals(s, set_score)

                elif s.startswith(PRAGMA_TAG):
                    # Pragma $tag(tagstring) Nonterminal/var1/var2 ...
                    s = s[len(PRAGMA_TAG):]
                    apply_to_nonterminals(s, lambda nt, tag : nt.add_tag(tag))

                elif s.startswith(PRAGMA_ROOT):
                    # Pragma $root(Nonterminal)
                    # Identify a nonterminal as a secondary parse root
                    if s[-1] != ')':
                        raise GrammarError("Expected right parenthesis in $root() pragma", fname, line)
                    root_nt = s[len(PRAGMA_ROOT):-1].strip()
                    if not root_nt.isidentifier():
                        raise GrammarError("Invalid nonterminal name '{0}'".format(root_nt), fname, line)
                    if root_nt not in nonterminals:
                        raise GrammarError("Unknown nonterminal '{0}'".format(root_nt))
                    # Add an implicit reference to the root
                    nonterminals[root_nt].add_ref()
                    self._secondary_roots.append(nonterminals[root_nt])
                else:
                    raise GrammarError("Unknown pragma '{0}'".format(s), fname, line)
            else:
                # New nonterminal
                if "→" in s:
                    # Fancy schmancy arrow sign: use it
                    rule = s.split("→", maxsplit=1)
                else:
                    rule = s.split("->", maxsplit=1)
                if len(rule) != 2:
                    raise GrammarError("Invalid syntax", fname, line)

                # Split nonterminal spec into name and variant(s),
                # i.e. NtName/var1/var2...
                ntv = rule[0].strip().split('/')
                current_NT = nt = ntv[0]
                current_variants = ntv[1:]
                if not nt.isidentifier():
                    raise GrammarError("Invalid nonterminal name '{0}'".format(nt), fname, line)
                for vname in current_variants:
                    if vname not in variants:
                        raise GrammarError("Unknown variant '{0}' for nonterminal '{1}'".format(vname, nt), fname, line)
                var_names = variant_names(nt, current_variants)

                # Add all previously unknown nonterminal variants
                for nt_var in var_names:
                    if nt_var in nonterminals:
                        cnt = nonterminals[nt_var]
                    else:
                        cnt = self._make_nonterminal(nt_var, fname, line)
                        nonterminals[nt_var] = cnt
                        if self._root is None:
                            # Remember first nonterminal as the root
                            self._root = cnt
                            self._root.add_ref() # Implicitly referenced
                    if cnt not in grammar:
                        grammar[cnt] = [ ]

                sep = '|' # Default production separator
                if '>' in rule[1]:
                    # Looks like a priority specification between productions
                    if '|' in rule[1]:
                        raise GrammarError("Cannot mix '|' and '>' between productions", fname, line)
                    sep = '>'
                for priority, prod in enumerate(rule[1].split(sep)):
                    # Add the productions on the right hand side, delimited by '|' or '>'
                    _parse_rhs(current_NT, current_variants, prod, priority if sep == '>' else 0)

        # Main parse loop

        try:
            with open(fname, "r", encoding="utf-8") as inp:
                # Read grammar file line-by-line

                for s in inp:

                    line += 1
                    # Ignore comments
                    ix = s.find('#')
                    if ix >= 0:
                        s = s[0:ix]

                    s = s.rstrip()
                    if not s:
                        continue

                    # If line starts with a blank, assume it's a continuation
                    if s[0].isspace():
                        current_line += s
                        continue

                    # New item starting: parse the previous one and start a new
                    parse_line(current_line)
                    current_line = s

                # Parse the final chunk
                parse_line(current_line)

        except (IOError, OSError):
            raise GrammarError("Unable to open or read grammar file", fname, 0)

        # Check all nonterminals to verify that they have productions and are referenced
        for nt in nonterminals.values():
            if verbose and not nt.has_ref:
                # Emit a warning message if verbose=True
                print ("Nonterminal {0} is never referenced in a production".format(nt))
                # raise GrammarError("Nonterminal {0} is never referenced in a production".format(nt), nt.fname(), nt.line())
            if nt not in grammar:
                raise GrammarError("Nonterminal {0} is referenced but not defined".format(nt), nt.fname, nt.line)
        for nt, plist in grammar.items():
            if len(plist) == 0:
                raise GrammarError("Nonterminal {0} has no productions".format(nt), nt.fname, nt.line)
            else:
                for _, p in plist:
                    if len(p) == 1 and p[0] == nt:
                        raise GrammarError("Nonterminal {0} produces itself".format(nt), p.fname, p.line)

        # Check that all nonterminals derive terminal strings
        agenda = [ nt for nt in nonterminals.values() ]
        der_t = set()
        while agenda:
            reduced = False
            for nt in agenda:
                for _, p in grammar[nt]:
                    if all(True if isinstance(s, Terminal) else s in der_t for s in p):
                        der_t.add(nt)
                        break
                if nt in der_t:
                    reduced = True
            if not reduced:
                break
            agenda = [ nt for nt in nonterminals.values() if nt not in der_t ]
        if agenda:
            raise GrammarError("Nonterminals {0} do not derive terminal strings"
                .format(", ".join([str(nt) for nt in agenda])), fname, 0)

        # Short-circuit nonterminals that point directly and uniquely to other nonterminals.
        # Becausee this creates a gap between the original grammar
        # and the resulting trees, we only do this for nonterminals with variants
        # that do not have a $score pragma
        shortcuts = { } # Dictionary of shortcuts
        for nt, plist in grammar.items():
            if not "_" in nt.name:
                # 'Pure' nonterminal with no variants: don't shortcut
                continue
            if self.nt_score(nt) != 0 or nt.has_tags:
                # Nonterminal has a score adjustment or a tag: don't shortcut
                continue
            if len(plist) == 1 and len(plist[0][1]) == 1 and isinstance(plist[0][1][0], Nonterminal):
                # This nonterminal has only one production, with only one nonterminal item
                target = plist[0][1][0]
                assert target != nt
                while target in shortcuts:
                    # Find ultimate destination of shortcut
                    assert target != shortcuts[target]
                    target = shortcuts[target]
                shortcuts[nt] = target

        # Go through all productions and replace the shortcuts with their targets
        for nt, plist in grammar.items():
            for _, p in plist:
                for ix, s in enumerate(p):
                    if isinstance(s, Nonterminal) and s in shortcuts:
                        # Replace the nonterminal in the production
                        target = shortcuts[s]
                        #if verbose:
                        #    # Print informational message in verbose mode
                        #    print("Production of {2}: Replaced {0} with {1}"
                        #        .format(s, target, nt))
                        p[ix] = target

        # Now, after applying shortcuts, check that all nonterminals are reachable from the root
        unreachable = { nt for nt in nonterminals.values() }

        def _remove(nt):
            """ Recursively remove all nonterminals that are reachable from nt """
            unreachable.remove(nt)
            for _, p in grammar[nt]:
                for s in p:
                    if isinstance(s, Nonterminal) and s in unreachable:
                        _remove(s)

        # Remove the main root and any secondary roots
        _remove(self._root)
        for r in self._secondary_roots:
            _remove(r)

        if unreachable:
            if verbose:
                # Emit a warning message if verbose=True
                print("The following nonterminals are unreachable from the root\nand will be removed from the grammar:")
                with changedlocale() as strxfrm:
                    for nt in sorted([ str(nt) for nt in unreachable ], key = strxfrm):
                        print("* {0}".format(str(nt)))
            # Simplify the grammar dictionary by removing unreachable nonterminals
            for nt in unreachable:
                del grammar[nt]
                del nonterminals[nt.name]

        # Reassign indices for nonterminals to avoid gaps in the number sequence
        # Nonterminals are indexed downwards from -1
        self._nonterminals_by_ix = { -1 - ix : nonterminals[key] for ix, key in enumerate(nonterminals.keys()) }
        for key, nt in self._nonterminals_by_ix.items():
            nt.set_index(key)

        # Reassign indices for terminals
        # Terminals are indexed upwards from 1
        self._terminals_by_ix = { ix + 1 : terminals[key] for ix, key in enumerate(terminals.keys()) }
        for key, t in self._terminals_by_ix.items():
            t.set_index(key)

        # Make a dictionary of productions by integer index >= 0
        for plist in grammar.values():
            for _, p in plist:
                self._productions_by_ix[p.index] = p

        # Grammar successfully read: note the file name and timestamp
        self._file_name = fname
        self._file_time = datetime.fromtimestamp(os.path.getmtime(fname))

        if write_binary:
            # Check whether to write a fresh binary file
            fname += ".bin"  # By default Reynir.grammar.bin
            try:
                binary_file_time = datetime.fromtimestamp(os.path.getmtime(fname))
            except os.error:
                binary_file_time = None
            # if Settings.DEBUG or binary_file_time is None or binary_file_time < self._file_time:
            if binary_file_time is None or binary_file_time < self._file_time:
                # No binary file or older than text file: write a fresh one
                self._write_binary(fname)
Example #20
0
def fetch_articles(
    topic=None,
    offset=0,
    limit=_DEFAULT_NUM_ARTICLES,
    start=None,
    location=None,
    country=None,
    root=None,
    author=None,
    enclosing_session=None,
):
    """ Return a list of articles in chronologically reversed order.
        Articles can be filtered by start date, location, country, root etc. """
    toplist = []

    with SessionContext(read_only=True, session=enclosing_session) as session:
        q = (session.query(Article).filter(Article.tree != None).filter(
            Article.timestamp != None).filter(
                Article.timestamp <= datetime.utcnow()).filter(
                    Article.heading > "").filter(
                        Article.num_sentences > 0).join(Root).filter(
                            Root.visible == True))

        # Filter by date
        if start is not None:
            q = q.filter(Article.timestamp > start)

        if location or country:
            q = q.join(Location)
            if location:
                # Filter by location
                q = q.filter(Location.name == location)
            if country:
                # Filter by country code
                q = q.filter(Location.country == country)

        # Filter by source (root) using domain (e.g. "kjarninn.is")
        if root:
            q = q.filter(Root.domain == root)

        # Filter by author name
        if author:
            q = q.filter(Article.author == author)

        # Filter by topic identifier
        if topic:
            q = q.join(ArticleTopic).join(Topic).filter(
                Topic.identifier == topic)

        q = q.order_by(desc(Article.timestamp)).offset(offset).limit(limit)

        class ArticleDisplay:
            """ Utility class to carry information about an article to the web template """
            def __init__(
                self,
                heading,
                timestamp,
                url,
                uuid,
                num_sentences,
                num_parsed,
                icon,
                localized_date,
                source,
            ):
                self.heading = heading
                self.timestamp = timestamp
                self.url = url
                self.uuid = uuid
                self.num_sentences = num_sentences
                self.num_parsed = num_parsed
                self.icon = icon
                self.localized_date = localized_date
                self.source = source

            @property
            def width(self):
                """ The ratio of parsed sentences to the total number of sentences,
                    expressed as a percentage string """
                if self.num_sentences == 0:
                    return "0%"
                return "{0}%".format(
                    (100 * self.num_parsed) // self.num_sentences)

            @property
            def time(self):
                return self.timestamp.isoformat()[11:16]

            @property
            def date(self):
                if datetime.today().year == self.timestamp.year:
                    return self.localized_date
                return self.fulldate

            @property
            def fulldate(self):
                return self.localized_date + self.timestamp.strftime(" %Y")

        with changedlocale(category="LC_TIME"):
            for a in q:
                # Instantiate article objects from results
                source = a.root.domain
                icon = source + ".png"
                locdate = a.timestamp.strftime("%-d. %b")

                d = ArticleDisplay(
                    heading=a.heading,
                    timestamp=a.timestamp,
                    url=a.url,
                    uuid=a.id,
                    num_sentences=a.num_sentences,
                    num_parsed=a.num_parsed,
                    icon=icon,
                    localized_date=locdate,
                    source=source,
                )
                toplist.append(d)

    return toplist
Example #21
0
def wordfreq():
    """ Return word frequency chart data for a given time period. """
    resp = dict(err=True)

    # Create datetime objects from query string args
    try:
        date_fmt = "%Y-%m-%d"
        date_from = datetime.strptime(request.args.get("date_from"), date_fmt)
        date_to = datetime.strptime(request.args.get("date_to"), date_fmt)
    except Exception as e:
        logging.warning("Failed to parse date arg: {0}".format(e))
        return better_jsonify(**resp)

    # Words parameter should be one or more word lemmas (w. optional category)
    warg = request.args.get("words")
    if not warg:
        return better_jsonify(**resp)

    # Split on comma or whitespace, limit to max 6 words
    warg = warg.strip().replace("  ", " ").replace(",", " ")
    words = [w.strip() for w in warg.split()][:6]
    # Word categories can be specified thus: "maður:kk"
    words = [tuple(w.split(":")) for w in words]

    with BIN_Db.get_db() as db:

        def cat4word(w):
            _, meanings = db.lookup_word(w, auto_uppercase=True)
            if meanings:
                # Give precedence to lemmas, e.g. interpret "reima" as
                # verb rather than gen. pl. of fem. noun "reim"
                lemmas = list(filter(lambda x: x.stofn == w, meanings))
                return lemmas[0].ordfl if lemmas else meanings[0].ordfl
            return "hk"

        # Get word category (ordfl) for each word, if needed
        valid_cats = ["kk", "kvk", "hk", "lo", "so"]
        for i, w in enumerate(words):
            if len(w) < 2 or w[1] not in valid_cats:
                words[i] = (w[0], cat4word(w[0]))

    colors = list(_LINE_COLORS)

    # Generate date labels
    now = datetime.utcnow()
    delta = date_to - date_from
    labels = [date_from + timedelta(days=i) for i in range(delta.days + 1)]
    with changedlocale(category="LC_TIME"):
        labels = [
            l.strftime("%-d. %b")
            if l.year == now.year else l.strftime("%-d. %b %Y") for l in labels
        ]

    # More human readble description of word categories
    CAT_DESC = {
        "kk": "kk. no.",
        "kvk": "kvk. no.",
        "hk": "hk. no.",
        "lo": "lo.",
        "so": "so.",
    }

    # Create datasets for front-end chart
    with SessionContext(commit=False) as session:
        data = dict(labels=labels, datasets=[])
        for w in words:
            # Look up frequency of word for the given period
            res = WordFrequencyQuery.fetch(w[0],
                                           w[1],
                                           date_from,
                                           date_to,
                                           enclosing_session=session)
            # Generate data and config for chart
            label = "{0} ({1})".format(w[0], CAT_DESC.get(w[1]))
            ds = dict(label=label, fill=False, lineTension=0)
            ds["borderColor"] = ds["backgroundColor"] = colors.pop(0)
            ds["data"] = [r[1] for r in res]
            data["datasets"].append(ds)

    # Create response
    resp["err"] = False
    resp["data"] = data
    # Update word list client-side
    resp["words"] = ", ".join([":".join(w) for w in words])

    return better_jsonify(**resp)