Ejemplo n.º 1
0
    def test_load_from_disk(self):
        # test for dict with warning:

        # save a file to disk to test it:
        inp = {"wuwu": {"warning": ["warning1", "warning2"]}}
        out = {"wuwu": {"warning": {"warning1", "warning2"}}}
        with open("test.gdn", "w") as test_file:
            test_file.write(json.dumps(inp))
        # test if it loads as expected:
        self.assertEqual(GenderNounDataHandler.load_from_disk("test.gdn"), out)

        # test for dict without warning:

        # save a file to disk to test it:
        inp = {"wuwu": {"fufu": "wawa"}}
        out = inp
        with open("test.gdn", "w") as test_file:
            test_file.write(json.dumps(inp))
        # test if it loads directly:
        self.assertEqual(GenderNounDataHandler.load_from_disk("test.gdn"), out)

        # test for a mixture:

        # save a file to disk to test it:
        inp = {"wuwu": {"warning": ["warning1", "warning2"]}, "wawa": {"fufu": "wawa"}}
        out = {"wuwu": {"warning": {"warning1", "warning2"}}, "wawa": {"fufu": "wawa"}}
        with open("test.gdn", "w") as test_file:
            test_file.write(json.dumps(inp))
        # test if it loads as expected:
        self.assertEqual(GenderNounDataHandler.load_from_disk("test.gdn"), out)

        # finally delete the file:
        os.remove("test.gdn")
Ejemplo n.º 2
0
    def test_make_sure_all_referenced_words_exist(self):
        # keep a dict that does not reference non-existing words as-is:
        self.assertEqual(GenderNounDataHandler.make_sure_all_referenced_words_exist(
            {"carpenter": {"gender": "neutral", "gender_map": {}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {}}})

        # create non-existent, yet referenced words:
        self.assertEqual(GenderNounDataHandler.make_sure_all_referenced_words_exist(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}}})
Ejemplo n.º 3
0
    def test_create_extra_links_to_gender_ambiguous_words(self):
        # test for fine input:
        self.assertEqual(GenderNounDataHandler.create_extra_links_to_gender_ambiguous_words(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}}}),

            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}}})

        # test for improvable input:
        self.assertEqual(GenderNounDataHandler.create_extra_links_to_gender_ambiguous_words(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter"}}}),

            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter", "male": "carpenter"}}})
Ejemplo n.º 4
0
    def test_remove_words_that_are_not_nouns(self):
        # keep the dict as it is if all words are nouns:
        self.assertEqual(GenderNounDataHandler.remove_words_that_are_not_nouns(
            {"carpenter": {"gender": "neutral", "gender_map": {}}, "food": {"gender": "neutral", "gender_map": {}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {}}, "food": {"gender": "neutral", "gender_map": {}}})

        # remove words that are not nouns:
        self.assertEqual(GenderNounDataHandler.remove_words_that_are_not_nouns(
            {"carpenter": {"gender": "neutral", "gender_map": {}}, "eat": {"gender": "neutral", "gender_map": {}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {}}})

        # but keep those that are linked to nouns with their gender dict:
        self.assertEqual(GenderNounDataHandler.remove_words_that_are_not_nouns(
            {"carpenter": {"gender": "neutral", "gender_map": {}}, "eat": {"gender": "neutral",
                                                                           "gender_map": {"male": "carpenter"}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {}}, "eat": {"gender": "neutral",
                                                                           "gender_map": {"male": "carpenter"}}})
Ejemplo n.º 5
0
    def test_save_to_disk(self):
        # test for dict with warning:

        # save a file to disk to test it:
        data_in_file = {"wuwu": {"warning": ["warning1", "warning2"]}}
        data_loaded = {"wuwu": {"warning": {"warning1", "warning2"}}}
        GenderNounDataHandler.save_to_disk(data_loaded, "test.gdn")
        with open("test.gdn", "r") as test_file:
            self.assertEqual(json.loads(test_file.read()), data_in_file)

        # test for dict without warning:

        # save a file to disk to test it:
        data_in_file = {"wuwu": {"fufu": "wawa"}}
        data_loaded = data_in_file
        GenderNounDataHandler.save_to_disk(data_loaded, "test.gdn")
        with open("test.gdn", "r") as test_file:
            self.assertEqual(json.loads(test_file.read()), data_in_file)

        # test for a mixture:

        # save a file to disk to test it:
        data_in_file = {"wuwu": {"warning": ["warning1", "warning2"]}, "wawa": {"fufu": "wawa"}}
        data_loaded = {"wuwu": {"warning": {"warning1", "warning2"}}, "wawa": {"fufu": "wawa"}}
        GenderNounDataHandler.save_to_disk(data_loaded, "test.gdn")
        with open("test.gdn", "r") as test_file:
            self.assertEqual(json.loads(test_file.read()), data_in_file)

        # finally delete the file:
        os.remove("test.gdn")
Ejemplo n.º 6
0
    def test_create_noun_data(self):
        # test if noun data is correctly generated when the `gender-nouns.gdn`-file is missing and has to be rebuild
        # on initialization.
        # the outcome should be identical to what it contained before it was deleted, and also, to the result of the
        # render pipeline.

        # create copy and reload module:
        old_gender_dict = copy.deepcopy(gn.GENDER_DICT)
        pipeline_output = GenderNounDataHandler.create_full_graph_from_web()
        os.remove("src/gendered-nouns.gdn")
        with self.assertWarns(ws.GenderedNounsBuildFromWebWarning):
            importlib.reload(sys.modules["src.gender_nouns"])

        # check for equality:
        self.assertEqual(old_gender_dict, pipeline_output)
        self.assertEqual(old_gender_dict, gn.GENDER_DICT)
        self.assertEqual(pipeline_output, gn.GENDER_DICT)
Ejemplo n.º 7
0
    def test_make_all_links_two_sided(self):
        # keep a dict where all links are two-sided (A <-> B C):
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}})

        # change {A->B C} to {A<->B C}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {}},
             "carpenter_man": {"gender": "male", "gender_map": {}}}),
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}})

        # the result for the following tests:
        triangle_result = {
            "carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress", "male": "carpenter_man"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress", "neutral": "carpenter"}}}

        # Triangle tests for V-shaped triangles (A and B are somewhat linked and B and C, but not C and A):

        # change {A->B<-C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress"}}}),
            triangle_result)

        # chance {A<-B->C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}}),
            triangle_result)

        # change {A<->B<-C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress"}}}),
            triangle_result)

        # chance {A<->B->C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}}),
            triangle_result)

        # chance {A<->B<->C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress"}}}),
            triangle_result)

        # change {A->B->C} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man"}},
             "carpenter_man": {"gender": "male", "gender_map": {}}}),
            triangle_result)

        # Now come the same tests again, but this time for triangles with three sides rather than V-shaped triangles:

        # change {A->B<-C->A} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress", "neutral": "carpenter"}}}),
            triangle_result)

        # change {A<->B<-C->A} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress", "neutral": "carpenter"}}}),
            triangle_result)

        # chance {A<->B->C->A} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"neutral": "carpenter"}}}),
            triangle_result)

        # chance {A<->B<->C->A} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"female": "carpentress", "neutral": "carpenter"}}}),
            triangle_result)

        # change {A->B->C->A} to {A<->B<->C<->A}:
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man"}},
             "carpenter_man": {"gender": "male", "gender_map": {"neutral": "carpenter"}}}),
            triangle_result)

        # indirect gender information (given by links):
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "neutral", "gender_map": {}}}),

            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpentress", "male": "carpenter_man"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "neutral", "gender_map": {"male": "carpenter_man", "female": "carpentress"}}})

        # direct gender information (given by `gender`-attrib) takes precedence over indirect gender information (given
        # by links):
        self.assertEqual(GenderNounDataHandler.make_all_links_two_sided(  # ↓ unusual gendering in links
            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpenter"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"neutral": "carpenter"}}}),

            {"carpenter": {"gender": "neutral", "gender_map": {"female": "carpenter", "male": "carpenter_man"}},
             "carpentress": {"gender": "female", "gender_map": {"male": "carpenter_man", "neutral": "carpenter"}},
             "carpenter_man": {"gender": "male", "gender_map": {"neutral": "carpenter", "female": "carpentress"}}}
        )  # does not affect newly build links, since explicit gender information takes precedence over it ↑

        # having to make decisions between two words based on `choose_better_word`:
        self.assertEqual(self.rmv_all_warn(GenderNounDataHandler.make_all_links_two_sided(
            {"bachelor": {"gender": "male", "gender_map": {}},
             "bachelor_girl": {"gender": "female", "gender_map": {"male": "bachelor"}},
             "bachelorette": {"gender": "female", "gender_map": {"male": "bachelor"}}})),

            {"bachelor": {"gender": "male", "gender_map": {"female": "bachelorette"}},
             "bachelor_girl": {"gender": "female", "gender_map": {"male": "bachelor"}},
             "bachelorette": {"gender": "female", "gender_map": {"male": "bachelor"}}})
Ejemplo n.º 8
0
    def test_choose_better_word(self):
        # This is merely a helper function to determine which words of two words are more "precise", but we will test it
        #  anyways:

        # test if all pairs of ranking work:
        self.assertEqual(GenderNounDataHandler.choose_better_word("long_word_w_underscores", "police_woman"),
                         "police_woman")

        self.assertEqual(GenderNounDataHandler.choose_better_word("police_woman", "police_person"),
                         "police_person")
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_man", "police_person"),
                         "police_person")

        self.assertEqual(GenderNounDataHandler.choose_better_word("police_person", "police_mother"),
                         "police_mother")
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_person", "police_father"),
                         "police_father")

        self.assertEqual(GenderNounDataHandler.choose_better_word("police_parent", "police_mother"),
                         "police_parent")
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_parent", "police_father"),
                         "police_parent")

        # test if words with height quality distance compare correctly as well:
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_person", "police_parent"),
                         "police_parent")
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_mother", "wuwuwu"),
                         "wuwuwu")

        # test if words of whom none fits into the hard-coded categories are decided alphabetically:
        self.assertEqual(GenderNounDataHandler.choose_better_word("wuwuwu", "aiaiaiaiaiaiai"),
                         "aiaiaiaiaiaiai")
        self.assertEqual(GenderNounDataHandler.choose_better_word("police_matron", "poioioice_matron"),
                         "poioioice_matron")
Ejemplo n.º 9
0
    def test_save_and_load(self):
        # test for dict with warning:

        # save a file to disk to test it:
        data = {"wuwu": {"warning": {"warning1", "warning2"}}}
        GenderNounDataHandler.save_to_disk(data, "test.gdn")
        self.assertEqual(data, GenderNounDataHandler.load_from_disk("test.gdn"))

        # test for dict without warning:

        # save a file to disk to test it:
        data = {"wuwu": {"fufu": "wawa"}}
        GenderNounDataHandler.save_to_disk(data, "test.gdn")
        self.assertEqual(data, GenderNounDataHandler.load_from_disk("test.gdn"))

        # test for words with underscores:
        data = {"fu_fu": {"wawa": "wuwu"}}
        GenderNounDataHandler.save_to_disk(data, "test.gdn")
        self.assertEqual(data, GenderNounDataHandler.load_from_disk("test.gdn"))

        # test for a mixture:

        # save a file to disk to test it:
        data = {"wuwu": {"warning": {"warning1", "warning2"}}, "wa_wa": {"fufu": "wawa"}}
        GenderNounDataHandler.save_to_disk(data, "test.gdn")
        self.assertEqual(data, GenderNounDataHandler.load_from_disk("test.gdn"))

        # finally delete the file:
        os.remove("test.gdn")
Ejemplo n.º 10
0
    def test_load_from_web(self):
        # we will not test the completeness of the dataset that this is based on, since we know it is incomplete, but
        # rather, whether the returned data appears to really be derived from the original, and have the right format.

        # make sure the type checks we wrote for TypedDict-typing actually work:

        # blueprint:
        json_original: gn.OriginalGenderNounData = json.loads(requests.get(
            "https://raw.githubusercontent.com/phseiff/gendered_words/master/gendered_words.json").text)
        # type check:
        self.assertTrue(check_type.is_instance(json_original, gn.OriginalGenderNounData))

        # the real thing:
        with warnings.catch_warnings(record=True) as w:
            json_generated = GenderNounDataHandler.load_from_web()
        # type check:
        self.assertTrue(check_type.is_instance(json_generated, gn.GeneratedGenderNounData))

        # make sure that all other values are there:
        for i in range(len(json_original)):
            word_data = json_original[i]
            if i < len(json_original) - 1 and json_original[i+1]["word"] == json_original[i]["word"]:
                # continue the loop if the word is part of the json data more than once, since only the last occurance
                # is counted:
                continue
            # make sure that words without a senseno are removed - unless we already had the same word in the data, in
            # which case they are not removed -> this test might lead to false negatives in the future:
            if "wordnet_senseno" not in word_data:
                if not (i > 0 and json_original[i - 1]["word"] == word_data["word"]
                        and "wordnet_senseno" in json_original[i - 1]):
                    self.assertNotIn(word_data["word"], json_generated)
                continue
            # make sure that words tagged as "other" are re-gendered as neutral:
            if word_data["gender"] == "o":
                self.assertEqual(json_generated[word_data["word"]]["gender"], "neutral")
            else:
                # and other words keep their gender:
                self.assertEqual(json_generated[word_data["word"]]["gender"][0], word_data["gender"])
            # if word is in the new json data, check further:
            if word_data["word"] in json_generated:
                # check that artificially added nouns that didn't come from wordnet aren't present anymore:
                self.assertIn("wordnet_senseno", word_data)
                # make sure we have an equally sized gender mapping:
                original_gender_map = word_data["gender_map"] if "gender_map" in word_data else {}
                generated_gender_map = json_generated[word_data["word"]]["gender_map"]
                self.assertEqual(len(generated_gender_map), len(original_gender_map))
                # make sure that both gender mappings are identical:
                for gender, mapped_word in generated_gender_map.items():
                    # make sure each linked gender is in the original json as well as the new:
                    self.assertIn(gender[0], original_gender_map)
                    # make sure their values are identical:
                    self.assertEqual(generated_gender_map[gender],
                                     original_gender_map[gender[0]][0]["word"].replace(" ", "_"))

        # make some simple exemplary tests to show this for some examples:

        # hermaphrodite got re-gendered as neutral:
        self.assertIn({"word": "hermaphrodite", "wordnet_senseno": "hermaphrodite.n.01", "gender": "o"},
                      json_original)
        self.assertEqual(json_generated["hermaphrodite"], {"gender": "neutral", "gender_map": {}})

        # heroine (as a word with a full gender map) was handled correctly:
        self.assertIn({"word": "heroine", "wordnet_senseno": "heroine.n.02", "gender": "f",
                       "gender_map": {"m": [{"parts_of_speech": "*", "word": "hero"}]}}, json_original)
        self.assertEqual(json_generated["heroine"], {"gender": "female", "gender_map": {"male": "hero"}})

        # reenactor (as a word with no gender map) is handled correctly (i.e. empty rather than no
        # gender map):
        self.assertIn({"word": "reenactor", "wordnet_senseno": "reenactor.n.01", "gender": "n"}, json_original)
        self.assertEqual(json_generated["reenactor"], {"gender": "neutral", "gender_map": {}})

        # women (as a word with no wordnet equivalent) is handles correctly (i.e. removed):
        self.assertIn({"word": "women", "gender": "f", "gender_map": {"m": [{"parts_of_speech": "*", "word": "men"}]}},
                      json_original)
        self.assertNotIn("women", json_generated)

        # great_grandson has its female version great_granddaughter listed with underscored rather than whitespace:
        self.assertIn({"word": "great_grandson", "wordnet_senseno": "great_grandson.n.01", "gender": "m",
                       "gender_map": {"f": [{"parts_of_speech": "*", "word": "great granddaughter"}]}},
                      json_original)
        self.assertEqual(json_generated["great_grandson"],
                         {"gender": "male", "gender_map": {"female": "great_granddaughter"}})