def test_incorrect_is_list_of_words_sorted(self): word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["wordc", "wordb", "worda"], {}) self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_list_of_words_sorted())
def test_correct_is_first_4_letter_unique(self): word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["worad", "worbd", "worcd", "wordd"], {}) self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_first_4_characters_unique())
def test_incorrect_is_word_length_invalid(self): for character_set, word_list in self.incorrect_scenarios_word_length: with self.subTest(): self.assertFalse(WordListValidator(character_set, word_list).is_word_length_valid())
def test_incorrect_is_number_of_words_valid(self): word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2047), {}) self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid()) word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, self.create_word_list_of_length(2049), {}) self.assertFalse(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_number_of_words_valid())
def test_correct_is_word_length_valid(self): word_list = WordList(CHARACTER_SET_DESCRIPTION_ENGLISH, ["word", "worda", "wordab", "wordabc", "wordabcd"], {}) self.assertTrue(WordListValidator(CHARACTER_SET_ENGLISH, word_list).is_word_length_valid())
def test_correct_is_character_set_valid(self): for character_set, word_list in self.correct_scenarios_character_set: with self.subTest(): self.assertTrue(WordListValidator(character_set, word_list).is_character_set_valid())
def test_correct_is_file_name_syntax(self): for expected_file_hash, file_name in self.correct_scenarios_file_name: with self.subTest(): self.assertTrue(WordListValidator({}, WordList(CHARACTER_SET_DESCRIPTION_EMPTY, [], {expected_file_hash:file_name})).is_file_name_valid())
def test_get_neo4j_graph_with_levenshtein_distances(self): for character_sets, word_list, expected_neo4j_graph in self.scenarios_levenshtein: with self.subTest(): neo4j_graph = WordListValidator(character_sets, word_list).get_neo4j_graph_with_levenshtein_distances() self.assertListEqual(neo4j_graph, expected_neo4j_graph)
BASE_CHARACTER_SETS_PATH = "../wlip-0001/base-character-sets/" parser = argparse.ArgumentParser( description="Validate word list", epilog= "Example: python3 scripts/validate_word_list.py -f wlip-0003/english-a1d03317-obsolete" ) parser.add_argument('-f', '--file', required=True) args = parser.parse_args() file_name = args.file base_character_sets = CharacterSetsReader().parse(BASE_CHARACTER_SETS_PATH) word_list_reader = WordListReader() try: word_list = word_list_reader.parse_file(file_name) except: print("error: unable to parse word list file: '{}'".format(file_name)) sys.exit(1) is_valid_word_list = WordListValidator(base_character_sets, word_list).validate() if not is_valid_word_list: print() print("error: word list in file: '{}' is invalid".format(file_name)) sys.exit(1) print("success: word list in file: '{}' is valid".format(file_name)) sys.exit(0)