Exemple #1
0
 def parse_file(self, file_path):
     self.validate_file_exists(file_path)
     file_hash_info = self.get_file_hash_info(file_path)
     lines = self.read_file(file_path)
     character_set_description = CharacterSetDescription(lines[0])
     word_list = self.parse_word_list(lines)
     return WordList(character_set_description, word_list, file_hash_info)
Exemple #2
0
 def create_word_list(self, file_path, base_character_sets):
     self.validate_file_exists(file_path)
     lines = self.read_file(file_path)
     words = self.get_words(lines)
     character_set_description = CharacterSetDescription(lines[0])
     words_mapped = CharacterSetUtils().map_words(character_set_description,
                                                  words)
     print("Words mapping with duplicates: ", words_mapped)
     list_of_words = self.create_list(words_mapped.keys())
     file_hash = FileHash().compute_hash("[english]", list_of_words)
     return WordList(character_set_description, list_of_words, file_hash)
 def test_correct_is_first_4_letter_unique(self):
     word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["worad", "worbd", "worcd", "wordd"], {})
     self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_first_4_characters_unique())
 def test_incorrect_is_list_of_words_sorted(self):
     word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wordc", "wordb", "worda"], {})
     self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_list_of_words_sorted())
    def test_incorrect_is_number_of_words_valid(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2047), {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())

        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2049), {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())
 def test_correct_is_word_length_valid(self):
     word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["word", "worda", "wordab", "wordabc", "wordabcd"], {})
     self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_word_length_valid())
 def test_correct_is_file_name_syntax(self):
     for expected_file_hash, file_name in self.correct_scenarios_file_name:
         with self.subTest():
             self.assertTrue(WordListValidator({}, WordList(CHARACTER_SET_DESCRIPTION_EMPTY, [], {expected_file_hash:file_name})).is_file_name_valid())
class test_WordListValidator(unittest.TestCase):

    incorrect_scenarios_file_name = [
        ["12345678", "just_language_name_without_hash"],
        ["12345678", "12345678_hash_on_wrong_position"],
        ["12345678", "12345678_hash_on_wrong_position-additional_description"],
        ["12345678", "language_name-1234567-hash_too_short"],
        ["12345678", "language_name-123456788-hash_too_long"],
        ["12345678", "language_name-12345678g-wrong_char_in_hash"],
        ["12345678", "language_name-12345678-additional_description-redundant_part"],
        ["12345678", str.ljust("language_name-12345678-file_name_too_long", WordListValidator.FILE_NAME_MAX_LENGTH + 1, 'x')]
    ]
    def test_incorrect_is_file_name_valid(self):
        for expected_file_hash, file_name in self.incorrect_scenarios_file_name:
            with self.subTest():
                self.assertFalse(WordListValidator({}, WordList(CHARACTER_SET_DESCRIPTION_EMPTY, [], {expected_file_hash:file_name})).is_file_name_valid())

    correct_scenarios_file_name = [
        ["12345678", "english-12345678"],
        ["abcdef12", "english-abcdef12"],
        ["12345678", "english-12345678-additional_description"],
        ["12345678", str.ljust("language_name-12345678-", WordListValidator.FILE_NAME_MAX_LENGTH, 'x')],
        ["12345678", str.ljust("language_name-12345678-additional_description", WordListValidator.FILE_NAME_MAX_LENGTH, 'x')],
    ]
    def test_correct_is_file_name_syntax(self):
        for expected_file_hash, file_name in self.correct_scenarios_file_name:
            with self.subTest():
                self.assertTrue(WordListValidator({}, WordList(CHARACTER_SET_DESCRIPTION_EMPTY, [], {expected_file_hash:file_name})).is_file_name_valid())

    correct_scenarios_character_set = [
        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_ENGLISH, ["wordone", "wordtwo", "wordthree"], {})],

        [CHARACTER_SET_POLISH,
         WordList(CHARACTER_SET_POLISH, ["zażółć", "gęślą", "jaźń"], {})],
    ]

    def test_correct_is_character_set_valid(self):
        for character_set, word_list in self.correct_scenarios_character_set:
            with self.subTest():
                self.assertTrue(WordListValidator(character_set, word_list).is_character_set_valid())

    not_allowed_character='ą'
    incorrect_scenarios_character_set = [
        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["word"+not_allowed_character], {})]
    ]
    def test_incorrect_is_character_set_valid(self):
        for character_set, word_list in self.incorrect_scenarios_character_set:
            with self.subTest():
                self.assertFalse(WordListValidator(character_set, word_list).is_character_set_valid())

    def test_correct_is_word_length_valid(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["word", "worda", "wordab", "wordabc", "wordabcd"], {})
        self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_word_length_valid())

    incorrect_scenarios_word_length = [
        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wor"], {})],

        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wordabcde"], {})]
    ]
    def test_incorrect_is_word_length_invalid(self):
        for character_set, word_list in self.incorrect_scenarios_word_length:
            with self.subTest():
                self.assertFalse(WordListValidator(character_set, word_list).is_word_length_valid())

    def test_correct_is_number_of_words_valid(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2048), {})
        self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())

    def test_incorrect_is_number_of_words_valid(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2047), {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())

        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2049), {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())

    def test_correct_is_list_of_words_sorted(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["worda", "wordb", "wordc"], {})
        self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_list_of_words_sorted())

    def test_incorrect_is_list_of_words_sorted(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wordc", "wordb", "worda"], {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_list_of_words_sorted())

    def test_correct_is_first_4_letter_unique(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["worad", "worbd", "worcd", "wordd"], {})
        self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_first_4_characters_unique())

    def test_correct_is_first_4_letter_unique(self):
        word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wordd", "wordd"], {})
        self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_first_4_characters_unique())

    scenarios_levenshtein = [
        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["aaaa", "aabb", "aaaabb", "bbaaaa"], {}),
         []],

        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["aaaa", "aaab"], {}),
         ['CREATE (aaaa:word {value: "aaaa"})',
          'CREATE (aaab:word {value: "aaab"})',
          'CREATE (aaaa)-[:D]->(aaab)']],

        [CHARACTER_SET_ENGLISH,
         WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["aaaa", "aaab", "caaa"], {}),
         ['CREATE (aaaa:word {value: "aaaa"})',
          'CREATE (aaab:word {value: "aaab"})',
          'CREATE (caaa:word {value: "caaa"})',
          'CREATE (aaaa)-[:D]->(aaab)',
          'CREATE (aaaa)-[:D]->(caaa)']],

        [CHARACTER_SET_ENGLISH,
         WordList(
            CharacterSetDescription("[english+ą:a]"),
             ["aaaa", "aaąą", "ąąaa", "bbbb"],
             {}
         ),
         ['CREATE (aaaa:word {value: "aaaa"})',
          'CREATE (aaąą:word {value: "aaąą"})',
          'CREATE (ąąaa:word {value: "ąąaa"})',
          'CREATE (aaaa)-[:D]->(aaąą)',
          'CREATE (aaaa)-[:D]->(ąąaa)',
          'CREATE (aaąą)-[:D]->(ąąaa)']],
    ]
    def test_get_neo4j_graph_with_levenshtein_distances(self):
        for character_sets, word_list, expected_neo4j_graph in self.scenarios_levenshtein:
            with self.subTest():
                neo4j_graph = WordListValidator(character_sets, word_list).get_neo4j_graph_with_levenshtein_distances()
                self.assertListEqual(neo4j_graph, expected_neo4j_graph)

    def create_word_list_of_length(self, no_of_words):
        word = "word"
        words = []
        for w in range(no_of_words):
            words.append(word)
        return words
Exemple #9
0
 def test_parse_file(self):
     EXPECTED_WORD_LIST = WordList(CHARACTER_SET_DESCRIPTION_POLISH,
                                   ["awokado", "banan", "tygrys"],
                                   {"3b784e25": self.SAMPLE_WORD_LIST})
     word_list = WordListReader().parse_file(self.SAMPLE_WORD_LIST)
     self.assertEqual(EXPECTED_WORD_LIST, word_list)