def test_load(self): wordnet = WordNetModel() wordnet.load("slowosiec-graph-polish.bin") n_vertices = len(list(wordnet.g.vertices())) n_edges = len(list(wordnet.g.edges())) self.assertGreater(n_vertices, 0) self.assertGreater(n_edges, 0)
def test_create_graph(self): wordnet = WordNetModel() wordnet.create_graph() n_vertices = len(list(wordnet.g.vertices())) n_edges = len(list(wordnet.g.edges())) self.assertGreater(n_vertices, 0) self.assertGreater(n_edges, 0)
def main(): test_dataset = data_loader.load_simlex_dataset() print(test_dataset) results = pd.DataFrame(columns=results_columns) for model_name in models.keys(): model = WordNetModel() # wordnet.load("slowosiec-graph-2019-11-07-t17-45.bin") model.load(f"{model_name}.bin") values_to_check, missed_words_array, missed_words, unique_missed_words = get_values_to_check( model, test_dataset) for similarity_function in similarity_functions.keys(): run_results, run_results_pd = test_model( model=model, values_to_check=values_to_check, similarity_function=similarity_function) run_results_pd.to_csv( f'{OUTPUT_PATH}/counted_results/{model_name}-{similarity_function}' ) for metric in metrics: model_result = { "metric": metric, "similarity_func": reverted_similarity_functions[similarity_function], "similarity_cor": metrics[metric](run_results[0, :], run_results[2, :]), "relatedness_cor": metrics[metric](run_results[1, :], run_results[2, :]), "model_name": model_name, "missed_words_array": missed_words_array, "missed_words": missed_words, "unique_missed_words": unique_missed_words, } model_results_df = pd.DataFrame(data=[model_result], columns=results_columns) results = pd.concat([results, model_results_df]) results = results.reset_index() results = results.drop(columns=["index"]) print(results) results.to_csv(f"{OUTPUT_PATH}/wordnet_results.csv")
def test_save_mixed_graph(self): """ Used to create binary version of mixed type graph. :return: """ wordnet = WordNetModel() wordnet.create_graph(is_polish=False) wordnet.create_lexical_graph(is_polish=False) file_name = "slowosiec-graph-hiperonim-lexical-selected-connected-by-top-sort" wordnet.save(file_name) file_path = os.path.join(DATA_PATH, os.path.join("models", file_name + ".bin")) self.assertTrue(os.path.isfile(file_path))
def test_wu_palmer_synonyms(self): """ Best are options: - filtering polish and all types of relations (slowosiec-graph-polish) - not filtering polish and hiperonim/hiponim relation type (slowosiec-graph-hiponim) :return: """ wordnet = WordNetModel() wordnet.load("slowosiec-graph-hiperonim-connected-by-top-sort.bin") synonyms1 = wordnet.synonyms("krzesło", dist_type="WuPalmer") synonyms2 = wordnet.synonyms("piec", dist_type="WuPalmer") synonyms3 = wordnet.synonyms("król", dist_type="WuPalmer") print(synonyms1) print(synonyms2) print(synonyms3) self.assertTrue(synonyms1 is not None) self.assertTrue(synonyms2 is not None) self.assertTrue(synonyms3 is not None) self.assertTrue(type(synonyms1) == list) self.assertTrue(type(synonyms2) == list) self.assertTrue(type(synonyms3) == list) self.assertTrue(type(synonyms1[0]) == str) self.assertTrue(type(synonyms2[0]) == str) self.assertTrue(type(synonyms3[0]) == str)
def test__WuPalmer(self): wordnet = WordNetModel() wordnet.load("slowosiec-graph-hiperonim-connected-by-top-sort.bin") dist = wordnet._WuPalmer() print(dist("łatwy", "męczący"))
def test__count_root_avg_depth(self): wordnet = WordNetModel() wordnet.load("slowosiec-graph-hiperonim-connected-by-top-sort.bin") depth = wordnet._count_root_depth() print(depth) self.assertGreater(depth, 0)