def test_create_tagreport_1(self):
        os.environ["STAGE"] = "dev"

        with Session() as session:

            # creating items
            item = Item()
            item.content = "RKI bestätigt Covid-19 Sterblichkeitsrate von 0,01 Prozent in (...) - Corona Transition "
            item.language = "de"
            item = item_handler.create_item(item, session)
            list_tags = [
                'Corona-Maßnahmen', 'Senkung der Treibhausgasemissionen',
                'China', 'Film', 'Menschen', 'Kanzlerkandidatur', 'sars-cov-2',
                '#verunglimpft', 'g.co/privacytools', 'Gregor Gysi',
                'Bundestagswahl2021', '19', '-',
                'Statistik falsch interpretiert', 'Bild.de', 'JF',
                'MdB Hansjörg Müller (AFD)', '100 Tage', 'Gruene',
                '#notwendige Polizeimaßnahmen', 'Sonne'
            ]
            for _ in range(200):
                list_tags.append(
                    random.choice(list_tags) + "-" + random.choice(list_tags))
            # store tags
            event = {"item": item.to_dict(), "Tags": list_tags}
            context = ""
            EnrichItem.store_itemtags(event, context)

            event = ""
            context = ""
            s = time.perf_counter()
            GetTags.create_tagreport(event, context)
            elapsed = time.perf_counter() - s
            assert elapsed < 30
    def test_post_tags_for_item_2(self, monkeypatch):
        monkeypatch.setenv("DBNAME", "Test")
        os.environ["STAGE"] = "dev"

        session = get_db_session(True, None)

        # creating items
        item = Item()
        item.content = "https://corona-transition.org/rki-bestatigt-covid-19-sterblichkeitsrate-von-0-01-prozent-in" \
                       "-deutschland?fbclid=IwAR2vLIkW_3EejFaeC5_wC_410uKhN_WMpWDMAcI-dF9TTsZ43MwaHeSl4n8%22 "
        item.language = "de"
        item = item_handler.create_item(item, True, session)

        event = {"pathParameters": {"item_id": item.id}}
        context = ""
        response = GetTags.get_tags_for_item(event, context, True, session)
        body = response['body']
        # Deserialize if body is string
        if isinstance(body, str):
            tags = json.loads(body)['Tags']
        else:
            tags = body['Tags']
        assert tags == []

        json.dumps({"tags": ["RKI", "Covid-19"]})
        event = {
            "pathParameters": {
                "item_id": item.id
            },
            "body": json.dumps({"tags": ["RKI", "Covid-19"]})
        }
        response = GetTags.post_tags_for_item(event, context, True, session)
        body = response['body']
        # Deserialize if body is string
        if isinstance(body, str):
            tags_added = json.loads(body)['added tags']
            tags_removed = json.loads(body)['removed tags']
        else:
            tags_added = body['added tags']
            tags_removed = body['removed tags']
        assert 'RKI' in tags_added
        assert 'Covid-19' in tags_added
        assert len(tags_removed) == 0
        assert tags_removed == []
        response = GetTags.get_tags_for_item(event, context, True, session)
        body = response['body']
        # Deserialize if body is string
        if isinstance(body, str):
            tags = json.loads(body)['Tags']
        else:
            tags = body['Tags']
        assert tags == ['RKI', 'Covid-19']
    def test_predict_tags_2(self):
        os.environ["STAGE"] = "dev"
        LanguageCode = "de"
        taxonomy_json = GetTags.download_taxonomy(LanguageCode)

        for category in taxonomy_json:
            if category == "similarity-threshold":
                continue
            if category == "excluded-terms":
                continue
            for tag in taxonomy_json[category]:
                for term in taxonomy_json[category][tag]:
                    event = {"Text": term, "LanguageCode": LanguageCode}
                    context = ""
                    ret = GetTags.predict_tags(event, context)
                    assert ret == [tag]
    def test_store_itemtag(self, monkeypatch):
        monkeypatch.setenv("DBNAME", "Test")
        os.environ["STAGE"] = "dev"

        session = get_db_session(True, None)

        # creating items
        item = Item()
        item.content = "RKI bestätigt Covid-19 Sterblichkeitsrate von 0,01 Prozent in (...) - Corona Transition "
        item.language = "de"
        item = item_handler.create_item(item, True, session)
        list_tags = ['RKI', 'Covid', 'Corona Transition']

        # store tags
        event = {"item": item.to_dict(), "Tags": list_tags}
        context = ""
        EnrichItem.store_itemtags(event, context, True, session)

        tag = tag_handler.get_tag_by_content(list_tags[0], True, session)
        assert tag.tag == list_tags[0]

        itemtag = tag_handler.get_itemtag_by_tag_and_item_id(
            tag.id, item.id, True, session)
        assert itemtag.id is not None

        event = {"pathParameters": {"item_id": item.id}}
        ret = GetTags.get_tags_for_item(event, context, True, session)
        body = ret['body']
        # Deserialize if body is string
        if isinstance(body, str):
            tags = json.loads(body)['Tags']
        else:
            tags = body['Tags']
        assert tags == list_tags
 def test_predict_tags_1(self):
     os.environ["STAGE"] = "dev"
     df_factchecks = UpdateFactChecks.read_df("factchecks_de.csv")
     for ind in range(1):
         claim_text = random.choice(df_factchecks)['claim_text']
         event = {"Text": claim_text, "LanguageCode": "de"}
         context = ""
         ret = GetTags.predict_tags(event, context)
         assert ret != []
    def test_predict_tags_5(self):
        os.environ["STAGE"] = "dev"
        LanguageCode = ""

        term = "ffp2"

        event = {"Text": term, "LanguageCode": LanguageCode}
        context = ""
        ret = GetTags.predict_tags(event, context)
        assert ret == []
    def test_predict_tags_4(self):
        os.environ["STAGE"] = "qa"
        LanguageCode = "de"
        taxonomy_json = GetTags.download_taxonomy(LanguageCode)
        df_factchecks = UpdateFactChecks.read_df("factchecks_de.csv")

        for _ in range(1):
            claim_text = random.choice(df_factchecks)['claim_text']
            for stopword in [
                    "\"", ",", ".", "!", "?", "«", "»", "(", ")", "-"
            ]:
                claim_text = claim_text.replace(stopword, " ")
            new_text = ""
            tags = []
            for substr in claim_text.split():
                substr = str.lower(substr)
                term_found = False
                for category in taxonomy_json:
                    if category == "similarity-threshold":
                        continue
                    if category == "excluded-terms":
                        continue
                    for tag in taxonomy_json[category]:
                        for term in taxonomy_json[category][tag]:
                            if substr == term:
                                term_found = True
                                if tag not in tags:
                                    tags.append(tag)
                                break
                        if term_found:
                            break
                if not term_found:
                    new_text += substr + " "
            event = {"Text": new_text, "LanguageCode": LanguageCode}
            context = ""
            ret = GetTags.predict_tags(event, context)
            assert ret == tags
Example #8
0
def test_get_closed_items():

    # pre-stuff
    os.environ["STAGE"] = "dev"

    with Session() as session:

        context = None

        # create item
        item = Item()
        item.content = "Test content"
        item.language = "de"
        item.status = "closed"
        item = item_handler.create_item(item, session)

        # create events with tags
        event1 = {
            "pathParameters": {
                "item_id": item.id
            },
            "body": json.dumps({"tags": ['C', 'B', 'D']})
        }
        event2 = {
            "pathParameters": {
                "item_id": item.id
            },
            "body": json.dumps({"tags": ['B', 'C']})
        }
        event3 = {
            "pathParameters": {
                "item_id": item.id
            },
            "body": json.dumps({"tags": ['A', 'B']})
        }

        # post tags
        GetTags.post_tags_for_item(event1, context)
        GetTags.post_tags_for_item(event2, context)
        GetTags.post_tags_for_item(event3, context)

        # Check if tags are sorted by number of mentions
        response = get_closed_items(event1, context)
        body = response['body']
        tags = json.loads(body)[0]['tags']
        assert tags in [['B', 'C', 'A', 'D'], ['B', 'C', 'D', 'A']]
def test_post_tags_for_item():
    # pre-stuff
    os.environ["STAGE"] = "dev"

    with Session() as session:

        # create item
        item = Item()
        item.content = "https://corona-transition.org/rki-bestatigt-covid-19-sterblichkeitsrate-von-0-01-prozent-in" \
            "-deutschland?fbclid=IwAR2vLIkW_3EejFaeC5_wC_410uKhN_WMpWDMAcI-dF9TTsZ43MwaHeSl4n8%22 "
        item.language = "de"
        item = item_handler.create_item(item, session)

        # store a fact check
        event = {
            "item": {
                "id": item.id,
                "content": item.content,
                "language": item.language,
            },
            "Tags": ["RKI", "Covid", "Corona Transition"]
        }
        context = ""
        EnrichItem.store_itemtags(event, context)

        # create event
        event = {"pathParameters": {"item_id": item.id}}

        response = GetTags.get_tags_for_item(event, context)
        body = response['body']
        tags = json.loads(body)['Tags']
        assert tags == ['RKI', 'Covid', 'Corona Transition']

        # create event with 1 already existing tag and 1 new tag
        event = {
            "pathParameters": {
                "item_id": item.id
            },
            "body": json.dumps({"tags": ['RKI', 'Covid-19']})
        }

        response = GetTags.post_tags_for_item(event, context)
        body = response['body']
        tags_added = json.loads(body)['added new tags']
        tags_counter_increased = json.loads(body)['increased tag counter']
        assert tags_added == ['Covid-19']
        assert len(tags_counter_increased) == 1
        assert 'RKI' in tags_counter_increased
        assert 'Covid-19' not in tags_counter_increased

        response = GetTags.get_tags_for_item(event, context)
        body = response['body']
        tags = json.loads(body)['Tags']
        assert tags == ['RKI', 'Covid', 'Corona Transition', 'Covid-19']
        assert tags != ['Covid', 'Corona Transition', 'Covid-19', 'RKI']

        # Check counts: RKI posted twice, all other once
        assert session.query(ItemTag).join(Tag).filter(
            Tag.tag == 'RKI').count() == 2
        assert session.query(ItemTag).join(Tag).filter(
            Tag.tag == 'Covid').count() == 1
        assert session.query(ItemTag).join(Tag).filter(
            Tag.tag == 'Corona Transition').count() == 1
        assert session.query(ItemTag).join(Tag).filter(
            Tag.tag == 'Covid-19').count() == 1