def test_load_pickle(self, tmp_path): """ Tests that the corpus can properly load from a pickle file, while retaining all of the relevant information :param tmp_path: a temporary directory created by pytest that will be used to store a pickle file from the test """ pickle_path = tmp_path / 'pickle.pgz' original_corpus = Corpus(common.TEST_CORPUS_PATH, csv_path=common.SMALL_TEST_CORPUS_CSV, name='test_corpus', pickle_on_load=pickle_path, ignore_warnings=True) # first make sure the small corpus is correct assert len(original_corpus) == 10 assert type(original_corpus.documents) == list assert original_corpus.name == 'test_corpus' # next load the pickle file to make sure data was copied correctly pickle_corpus = Corpus(pickle_path, name='test_corpus') assert len(pickle_corpus) == 10 assert type(original_corpus.documents) == list assert pickle_corpus.name == 'test_corpus' # Make sure the corpora are equal assert original_corpus == pickle_corpus
def test_plot_gender_breakdown_different_file_constructions(self): c = Corpus( common.TEST_CORPUS_PATH, csv_path=common.LARGE_TEST_CORPUS_CSV, name='test_corpus', ) default_save_name = 'gender_breakdown_for_' + c.name.replace( ' ', '_') + '.png' test_file_1_name = "testing_file1.png" default_save_path = OUTPUT_DIRECTORY_PATH / default_save_name test_file_save_path = OUTPUT_DIRECTORY_PATH / test_file_1_name test_file_paths = [] plot_gender_breakdown(c, OUTPUT_DIRECTORY_PATH) assert Path.is_file(default_save_path) test_file_paths.append(default_save_path) plot_gender_breakdown(c, OUTPUT_DIRECTORY_PATH, "testing file1") assert Path.is_file(test_file_save_path) test_file_paths.append(test_file_save_path) for file_created1 in test_file_paths: for file_created2 in test_file_paths: assert filecmp.cmp(file_created1, file_created2) for file_created in test_file_paths: Path.unlink(file_created)
def test_load_without_csv(self): """ Tests that the corpus properly loads when not provided metadata """ c = Corpus(common.TEST_CORPUS_PATH) assert len(c) == 99 assert type(c.documents) == list assert c.name is None
def test_load_with_csv(self): """ Test that the corpus properly loads when provided a metadata csv """ c = Corpus( common.TEST_CORPUS_PATH, csv_path=common.LARGE_TEST_CORPUS_CSV, name='test_corpus', ) assert len(c) == 99 assert type(c.documents) == list assert c.name == 'test_corpus'
def test_create_all_visualizations_but_with_no_corpus_name(self): c = Corpus(common.TEST_CORPUS_PATH, csv_path=common.LARGE_TEST_CORPUS_CSV) default_gender_breakdown = 'gender_breakdown_for_corpus.png' default_metadata_pie = 'percentage_acquired_metadata_for_corpus.png' default_country_pub = 'country_of_pub_for_corpus.png' default_pub_date = 'date_of_pub_for_corpus.png' create_corpus_summary_visualizations(c, OUTPUT_DIRECTORY_PATH) assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_gender_breakdown) assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_pub_date) assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_country_pub) assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_metadata_pie)