예제 #1
0
    def test_load_pickle(self, tmp_path):
        """
        Tests that the corpus can properly load from a pickle file, while retaining
        all of the relevant information

        :param tmp_path: a temporary directory created by pytest that will be used to store
            a pickle file from the test
        """

        pickle_path = tmp_path / 'pickle.pgz'

        original_corpus = Corpus(common.TEST_CORPUS_PATH,
                                 csv_path=common.SMALL_TEST_CORPUS_CSV,
                                 name='test_corpus',
                                 pickle_on_load=pickle_path,
                                 ignore_warnings=True)

        # first make sure the small corpus is correct
        assert len(original_corpus) == 10
        assert type(original_corpus.documents) == list
        assert original_corpus.name == 'test_corpus'

        # next load the pickle file to make sure data was copied correctly
        pickle_corpus = Corpus(pickle_path, name='test_corpus')
        assert len(pickle_corpus) == 10
        assert type(original_corpus.documents) == list
        assert pickle_corpus.name == 'test_corpus'

        # Make sure the corpora are equal
        assert original_corpus == pickle_corpus
    def test_plot_gender_breakdown_different_file_constructions(self):
        c = Corpus(
            common.TEST_CORPUS_PATH,
            csv_path=common.LARGE_TEST_CORPUS_CSV,
            name='test_corpus',
        )

        default_save_name = 'gender_breakdown_for_' + c.name.replace(
            ' ', '_') + '.png'
        test_file_1_name = "testing_file1.png"

        default_save_path = OUTPUT_DIRECTORY_PATH / default_save_name
        test_file_save_path = OUTPUT_DIRECTORY_PATH / test_file_1_name

        test_file_paths = []

        plot_gender_breakdown(c, OUTPUT_DIRECTORY_PATH)
        assert Path.is_file(default_save_path)
        test_file_paths.append(default_save_path)

        plot_gender_breakdown(c, OUTPUT_DIRECTORY_PATH, "testing file1")
        assert Path.is_file(test_file_save_path)
        test_file_paths.append(test_file_save_path)

        for file_created1 in test_file_paths:
            for file_created2 in test_file_paths:
                assert filecmp.cmp(file_created1, file_created2)

        for file_created in test_file_paths:
            Path.unlink(file_created)
예제 #3
0
    def test_load_without_csv(self):
        """
        Tests that the corpus properly loads when not provided metadata
        """

        c = Corpus(common.TEST_CORPUS_PATH)
        assert len(c) == 99
        assert type(c.documents) == list
        assert c.name is None
예제 #4
0
    def test_load_with_csv(self):
        """
        Test that the corpus properly loads when provided a metadata csv
        """

        c = Corpus(
            common.TEST_CORPUS_PATH,
            csv_path=common.LARGE_TEST_CORPUS_CSV,
            name='test_corpus',
        )
        assert len(c) == 99
        assert type(c.documents) == list
        assert c.name == 'test_corpus'
    def test_create_all_visualizations_but_with_no_corpus_name(self):
        c = Corpus(common.TEST_CORPUS_PATH,
                   csv_path=common.LARGE_TEST_CORPUS_CSV)

        default_gender_breakdown = 'gender_breakdown_for_corpus.png'
        default_metadata_pie = 'percentage_acquired_metadata_for_corpus.png'
        default_country_pub = 'country_of_pub_for_corpus.png'
        default_pub_date = 'date_of_pub_for_corpus.png'

        create_corpus_summary_visualizations(c, OUTPUT_DIRECTORY_PATH)
        assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_gender_breakdown)
        assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_pub_date)
        assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_country_pub)
        assert Path.is_file(OUTPUT_DIRECTORY_PATH / default_metadata_pie)