def test_create_tagreport_1(self): os.environ["STAGE"] = "dev" with Session() as session: # creating items item = Item() item.content = "RKI bestätigt Covid-19 Sterblichkeitsrate von 0,01 Prozent in (...) - Corona Transition " item.language = "de" item = item_handler.create_item(item, session) list_tags = [ 'Corona-Maßnahmen', 'Senkung der Treibhausgasemissionen', 'China', 'Film', 'Menschen', 'Kanzlerkandidatur', 'sars-cov-2', '#verunglimpft', 'g.co/privacytools', 'Gregor Gysi', 'Bundestagswahl2021', '19', '-', 'Statistik falsch interpretiert', 'Bild.de', 'JF', 'MdB Hansjörg Müller (AFD)', '100 Tage', 'Gruene', '#notwendige Polizeimaßnahmen', 'Sonne' ] for _ in range(200): list_tags.append( random.choice(list_tags) + "-" + random.choice(list_tags)) # store tags event = {"item": item.to_dict(), "Tags": list_tags} context = "" EnrichItem.store_itemtags(event, context) event = "" context = "" s = time.perf_counter() GetTags.create_tagreport(event, context) elapsed = time.perf_counter() - s assert elapsed < 30
def test_post_tags_for_item_2(self, monkeypatch): monkeypatch.setenv("DBNAME", "Test") os.environ["STAGE"] = "dev" session = get_db_session(True, None) # creating items item = Item() item.content = "https://corona-transition.org/rki-bestatigt-covid-19-sterblichkeitsrate-von-0-01-prozent-in" \ "-deutschland?fbclid=IwAR2vLIkW_3EejFaeC5_wC_410uKhN_WMpWDMAcI-dF9TTsZ43MwaHeSl4n8%22 " item.language = "de" item = item_handler.create_item(item, True, session) event = {"pathParameters": {"item_id": item.id}} context = "" response = GetTags.get_tags_for_item(event, context, True, session) body = response['body'] # Deserialize if body is string if isinstance(body, str): tags = json.loads(body)['Tags'] else: tags = body['Tags'] assert tags == [] json.dumps({"tags": ["RKI", "Covid-19"]}) event = { "pathParameters": { "item_id": item.id }, "body": json.dumps({"tags": ["RKI", "Covid-19"]}) } response = GetTags.post_tags_for_item(event, context, True, session) body = response['body'] # Deserialize if body is string if isinstance(body, str): tags_added = json.loads(body)['added tags'] tags_removed = json.loads(body)['removed tags'] else: tags_added = body['added tags'] tags_removed = body['removed tags'] assert 'RKI' in tags_added assert 'Covid-19' in tags_added assert len(tags_removed) == 0 assert tags_removed == [] response = GetTags.get_tags_for_item(event, context, True, session) body = response['body'] # Deserialize if body is string if isinstance(body, str): tags = json.loads(body)['Tags'] else: tags = body['Tags'] assert tags == ['RKI', 'Covid-19']
def test_predict_tags_2(self): os.environ["STAGE"] = "dev" LanguageCode = "de" taxonomy_json = GetTags.download_taxonomy(LanguageCode) for category in taxonomy_json: if category == "similarity-threshold": continue if category == "excluded-terms": continue for tag in taxonomy_json[category]: for term in taxonomy_json[category][tag]: event = {"Text": term, "LanguageCode": LanguageCode} context = "" ret = GetTags.predict_tags(event, context) assert ret == [tag]
def test_store_itemtag(self, monkeypatch): monkeypatch.setenv("DBNAME", "Test") os.environ["STAGE"] = "dev" session = get_db_session(True, None) # creating items item = Item() item.content = "RKI bestätigt Covid-19 Sterblichkeitsrate von 0,01 Prozent in (...) - Corona Transition " item.language = "de" item = item_handler.create_item(item, True, session) list_tags = ['RKI', 'Covid', 'Corona Transition'] # store tags event = {"item": item.to_dict(), "Tags": list_tags} context = "" EnrichItem.store_itemtags(event, context, True, session) tag = tag_handler.get_tag_by_content(list_tags[0], True, session) assert tag.tag == list_tags[0] itemtag = tag_handler.get_itemtag_by_tag_and_item_id( tag.id, item.id, True, session) assert itemtag.id is not None event = {"pathParameters": {"item_id": item.id}} ret = GetTags.get_tags_for_item(event, context, True, session) body = ret['body'] # Deserialize if body is string if isinstance(body, str): tags = json.loads(body)['Tags'] else: tags = body['Tags'] assert tags == list_tags
def test_predict_tags_1(self): os.environ["STAGE"] = "dev" df_factchecks = UpdateFactChecks.read_df("factchecks_de.csv") for ind in range(1): claim_text = random.choice(df_factchecks)['claim_text'] event = {"Text": claim_text, "LanguageCode": "de"} context = "" ret = GetTags.predict_tags(event, context) assert ret != []
def test_predict_tags_5(self): os.environ["STAGE"] = "dev" LanguageCode = "" term = "ffp2" event = {"Text": term, "LanguageCode": LanguageCode} context = "" ret = GetTags.predict_tags(event, context) assert ret == []
def test_predict_tags_4(self): os.environ["STAGE"] = "qa" LanguageCode = "de" taxonomy_json = GetTags.download_taxonomy(LanguageCode) df_factchecks = UpdateFactChecks.read_df("factchecks_de.csv") for _ in range(1): claim_text = random.choice(df_factchecks)['claim_text'] for stopword in [ "\"", ",", ".", "!", "?", "«", "»", "(", ")", "-" ]: claim_text = claim_text.replace(stopword, " ") new_text = "" tags = [] for substr in claim_text.split(): substr = str.lower(substr) term_found = False for category in taxonomy_json: if category == "similarity-threshold": continue if category == "excluded-terms": continue for tag in taxonomy_json[category]: for term in taxonomy_json[category][tag]: if substr == term: term_found = True if tag not in tags: tags.append(tag) break if term_found: break if not term_found: new_text += substr + " " event = {"Text": new_text, "LanguageCode": LanguageCode} context = "" ret = GetTags.predict_tags(event, context) assert ret == tags
def test_get_closed_items(): # pre-stuff os.environ["STAGE"] = "dev" with Session() as session: context = None # create item item = Item() item.content = "Test content" item.language = "de" item.status = "closed" item = item_handler.create_item(item, session) # create events with tags event1 = { "pathParameters": { "item_id": item.id }, "body": json.dumps({"tags": ['C', 'B', 'D']}) } event2 = { "pathParameters": { "item_id": item.id }, "body": json.dumps({"tags": ['B', 'C']}) } event3 = { "pathParameters": { "item_id": item.id }, "body": json.dumps({"tags": ['A', 'B']}) } # post tags GetTags.post_tags_for_item(event1, context) GetTags.post_tags_for_item(event2, context) GetTags.post_tags_for_item(event3, context) # Check if tags are sorted by number of mentions response = get_closed_items(event1, context) body = response['body'] tags = json.loads(body)[0]['tags'] assert tags in [['B', 'C', 'A', 'D'], ['B', 'C', 'D', 'A']]
def test_post_tags_for_item(): # pre-stuff os.environ["STAGE"] = "dev" with Session() as session: # create item item = Item() item.content = "https://corona-transition.org/rki-bestatigt-covid-19-sterblichkeitsrate-von-0-01-prozent-in" \ "-deutschland?fbclid=IwAR2vLIkW_3EejFaeC5_wC_410uKhN_WMpWDMAcI-dF9TTsZ43MwaHeSl4n8%22 " item.language = "de" item = item_handler.create_item(item, session) # store a fact check event = { "item": { "id": item.id, "content": item.content, "language": item.language, }, "Tags": ["RKI", "Covid", "Corona Transition"] } context = "" EnrichItem.store_itemtags(event, context) # create event event = {"pathParameters": {"item_id": item.id}} response = GetTags.get_tags_for_item(event, context) body = response['body'] tags = json.loads(body)['Tags'] assert tags == ['RKI', 'Covid', 'Corona Transition'] # create event with 1 already existing tag and 1 new tag event = { "pathParameters": { "item_id": item.id }, "body": json.dumps({"tags": ['RKI', 'Covid-19']}) } response = GetTags.post_tags_for_item(event, context) body = response['body'] tags_added = json.loads(body)['added new tags'] tags_counter_increased = json.loads(body)['increased tag counter'] assert tags_added == ['Covid-19'] assert len(tags_counter_increased) == 1 assert 'RKI' in tags_counter_increased assert 'Covid-19' not in tags_counter_increased response = GetTags.get_tags_for_item(event, context) body = response['body'] tags = json.loads(body)['Tags'] assert tags == ['RKI', 'Covid', 'Corona Transition', 'Covid-19'] assert tags != ['Covid', 'Corona Transition', 'Covid-19', 'RKI'] # Check counts: RKI posted twice, all other once assert session.query(ItemTag).join(Tag).filter( Tag.tag == 'RKI').count() == 2 assert session.query(ItemTag).join(Tag).filter( Tag.tag == 'Covid').count() == 1 assert session.query(ItemTag).join(Tag).filter( Tag.tag == 'Corona Transition').count() == 1 assert session.query(ItemTag).join(Tag).filter( Tag.tag == 'Covid-19').count() == 1