def test_reduce_entities(): entities_cleaned = preprocess.entity_cleaning(input_entities) entities_with_page_number = preprocess.replace_page_number( entities_cleaned, "9") mapped_entities = preprocess.map_entities(entities_with_page_number) reduced_entities = preprocess.reduce_entities(mapped_entities) assert reduced_entities == reduce_
def test_get_entity_type(): entities_to_assert = [] entity_types = [ "YEAR", "PERSON", "LOCATION", "ORGANIZATION", "COMMERCIAL_ITEM", "EVENT", "TITLE", "DATE", "QUANTITY", "OTHER", "ZIPCODE", "STATE", "ROUTE", "CITY", ] entities_cleaned = preprocess.entity_cleaning(input_entities) entities_with_page_number = preprocess.replace_page_number( entities_cleaned, "9") mapped_entities = preprocess.map_entities(entities_with_page_number) reduced_entities = preprocess.reduce_entities(mapped_entities) for entity in reduced_entities.values(): entities_to_assert.append( formatter.get_entity_type(entity["entity_type"], entity)) for i in entities_to_assert: assert i in entity_types
def validate(in_req, out_exp): cleaned = preprocess.entity_cleaning(in_req) with_page_number = preprocess.replace_page_number(cleaned, "9") mapped_entities = preprocess.map_entities(with_page_number) reduced_entities = preprocess.reduce_entities(mapped_entities) groupped_entities = formatter.group_entities(reduced_entities) translated_entities = formatter.translate_entities(groupped_entities, "9") if "PERSON NAME" in translated_entities.keys(): translated_entities = formatter.add_full_name_entities( translated_entities) assert translated_entities == out_exp
def test_entity_aggregation(): cleaned = preprocess.entity_cleaning(input_entities) with_page_number = preprocess.replace_page_number(cleaned, "9") mapped_entities = preprocess.map_entities(with_page_number) assert mapped_entities == aggregate_by_id reduced_entities = preprocess.reduce_entities(mapped_entities) assert reduced_entities == reduce_ groupped_entities = formatter.group_entities(reduced_entities) assert groupped_entities == group_by_type translate_entities = formatter.translate_entities(groupped_entities, "9") assert translate_entities == output_aggregation
def format_entities(entities, page): washed_entities = preprocess.entity_cleaning(entities) entities_with_page_number = preprocess.replace_page_number( washed_entities, page) mapped_entities = preprocess.map_entities(entities_with_page_number) reduced_entities = preprocess.reduce_entities(mapped_entities) groupped_entities_by_type = group_entities(reduced_entities) translated_entities = translate_entities(groupped_entities_by_type, page) if "PERSON NAME" in translated_entities.keys(): translated_entities = add_full_name_entities(translated_entities) logger.info(f"Translated Entities = {translated_entities}") return {"statusCode": 200, "body": translated_entities}
def test_map_entities(): entities_cleaned = preprocess.entity_cleaning(input_entities) entities_with_page_number = preprocess.replace_page_number( entities_cleaned, "9") mapped_entities = preprocess.map_entities(entities_with_page_number) assert mapped_out == mapped_entities