Beispiel #1
0
 def test_download_from_child(self):
     # child corpus
     corpus = "testcorpus-zip-child"
     # download
     download(corpus)
     # assert the directory is there and non-empty
     path = config.get(corpus, util.__PATH__)
     self.assertTrue(list(path.iterdir()))
Beispiel #2
0
    def test_remove(self):
        corpus = "testcorpus-zip"

        # download
        download(corpus)
        path = config.get(corpus, util.__PATH__)
        # assert the directory is there and non-empty
        self.assertTrue(list(path.iterdir()))

        # remove without input cancels
        with mock.patch('builtins.input', return_value=''):
            with mock.patch('sys.stdout', new=StringIO()) as out:
                remove(corpus)
                self.assertTrue(
                    out.getvalue().startswith("Canceled. Corpus '"))
        # assert the directory is there and non-empty
        self.assertTrue(list(path.iterdir()))

        # remove with 'no' input cancels
        with mock.patch('builtins.input', return_value='no'):
            with mock.patch('sys.stdout', new=StringIO()) as out:
                remove(corpus)
                self.assertTrue(
                    out.getvalue().startswith("Canceled. Corpus '"))
        # assert the directory is there and non-empty
        self.assertTrue(list(path.iterdir()))

        # remove with 'yes' input removes
        with mock.patch('builtins.input', return_value='yes'):
            remove(corpus)
        # assert the directory is not there anymore
        self.assertFalse(path.exists())

        # re-download
        download(corpus)
        # assert the directory is there and non-empty
        self.assertTrue(list(path.iterdir()))
        # silent remove succeeds
        remove(corpus, silent=True)
        # assert the directory is not there anymore
        self.assertFalse(path.exists())

        # remove non-existent raises
        self.assertRaises(FileNotFoundError,
                          lambda: remove(corpus, silent=True))
        # which can be ignored
        remove(corpus, not_exists_ok=True, silent=True)

        # path pointing to file raises
        self.assertRaises(
            NotADirectoryError,
            lambda: remove(corpus, path='tests/FileCorpus/file_1'))
Beispiel #3
0
    def test_set_and_add_corpus(self):
        # add corpus
        self.assertFalse("XXX" in config.config)
        add_corpus("XXX")
        self.assertTrue("XXX" in config.config)

        # raise when existing corpus is reset
        self.assertRaises(CorpusExistsError, lambda: add_corpus("XXX"))

        # setting non-string corpus warns
        with self.assertWarns(RuntimeWarning):
            self.assertFalse(1 in config.config)
            add_corpus(1)
            self.assertFalse(1 in config.config)
            self.assertTrue("1" in config.config)

        # set value in section to None
        self.assertFalse("YYY" in config.config["XXX"])
        set("XXX", YYY=None)
        self.assertTrue("YYY" in config.config["XXX"])
        self.assertEqual(None, config.config["XXX"]["YYY"])
        self.assertNotEqual('None', config.config["XXX"]["YYY"])

        # warning for non-string keys and values (except None for value)
        with self.assertWarns(RuntimeWarning):
            set_key_value("XXX", key=1, value=None)
            self.assertEqual(None, get("XXX", 1))
        with self.assertWarns(RuntimeWarning):
            set_key_value("XXX", "1", 2)
            self.assertNotEqual(2, get("XXX", 1))
            self.assertEqual("2", get("XXX", 1))

        # raise if corpus not found
        self.assertRaises(CorpusNotFoundError, lambda: set_key_value("Does not exist", key="key", value="value"))

        # set value in section
        set("XXX", YYY="ZZZ")
        self.assertEqual("ZZZ", config.config["XXX"]["YYY"])

        # set value in DEFAULT
        set_default(AAA=None)
        self.assertEqual(None, get("XXX", "AAA"))
        set_default(AAA="BBB")
        self.assertEqual("BBB", get("XXX", "AAA"))

        # add corpus
        add_corpus("new corpus", key1="val1", key2="val2")
        self.assertEqual("val1", get("new corpus", "key1"))
        self.assertEqual("val2", get("new corpus", "key2"))
        # add existing corpus raises
        self.assertRaises(CorpusExistsError, lambda: add_corpus("new corpus"))
Beispiel #4
0
    def test_download(self):
        # custom Exception to check the provided access function was actually executed
        class AccessCheck(Exception):
            pass

        def access(*args, **kwargs):
            raise AccessCheck

        # check different corpora
        for corpus in [
                "testcorpus-git-http",
                # "testcorpus-git-ssh",  # does not work in GitHub action
                "testcorpus-zip",
                "testcorpus-tar.gz"
        ]:
            # check with custom access function (raises and leaves directory empty)
            self.assertRaises(AccessCheck,
                              lambda: download(corpus, access=access))
            # check bad access method
            self.assertRaises(
                DownloadFailedError,
                lambda: download(corpus, access="bad access method"))
            # download corpus
            download(corpus)
            # assert the directory is there and non-empty
            path = config.get(corpus, util.__PATH__)
            self.assertTrue(list(path.iterdir()))
            # fails because exists
            self.assertRaises(DownloadFailedError, lambda: download(corpus))

        # test failure for bad ULR
        self.assertRaises(
            DownloadFailedError, lambda: download(
                None,
                access="zip",
                url=
                "http://some-bad-url-that-skrews-up-the-tests.com/corpus.zip"))
Beispiel #5
0
    def test_get_methods(self):
        # init and load test corpora
        reset_config('tests/test_corpora.ini')
        root = Path("~/corpora").expanduser()
        # check get method returns the unprocessed values
        self.assertEqual(None, get("Test Corpus", __INFO__))
        self.assertEqual(root, get("Test Corpus", __ROOT__))
        self.assertEqual(root / "Test Corpus", get("Test Corpus", __PATH__))
        self.assertEqual(None, get("Test Corpus", __PARENT__))
        self.assertEqual(None, get("Test Corpus", __ACCESS__))
        self.assertEqual(None, get("Test Corpus", __URL__))
        self.assertEqual("FileCorpus", get("Test Corpus", __LOADER__))

        # check summary
        self.assertEqual("[Test Corpus]\n"
                         f"    {__INFO__}: None\n"
                         f"    {__ROOT__}: {root}\n"
                         f"    {__PATH__}: {root / 'Test Corpus'}\n"
                         f"    {__PARENT__}: None\n"
                         f"    {__ACCESS__}: None\n"
                         f"    {__URL__}: None\n"
                         f"    {__LOADER__}: FileCorpus", summary("Test Corpus"))
        self.assertEqual("[Test Corpus]\n"
                         f"    {__INFO__}: None\n"
                         f"    {__ROOT__}: ~/corpora\n"
                         f"    {__PATH__}: None\n"
                         f"    {__PARENT__}: None\n"
                         f"    {__ACCESS__}: None\n"
                         f"    {__URL__}: None\n"
                         f"    {__LOADER__}: FileCorpus", summary("Test Corpus", raw=True))

        # check corpus_params returns iterator over processed values
        self.assertEqual(sorted([(__INFO__, "Some info"),
                                 (__ROOT__, root / "Test Corpus"),
                                 (__PATH__, root / "Test Corpus" / "Test Sub-Corpus"),
                                 (__PARENT__, "Test Corpus"),
                                 (__ACCESS__, None),
                                 (__URL__, None),
                                 (__LOADER__, "FileCorpus")]),
                         sorted(list(corpus_params("Test Sub-Corpus"))))

        # corpus_params with unknown corpus raises
        self.assertRaises(CorpusNotFoundError, lambda: list(corpus_params("Unknown Corpus")))

        # check default root
        global_root = Path('~/corpora').expanduser()
        self.assertEqual(global_root, get("Test Corpus", __ROOT__))
        # check warning for relative root
        with self.assertWarns(RuntimeWarning):
            relative_root = Path('some/relative/path')
            self.assertFalse(relative_root.is_absolute())
            self.assertEqual(relative_root, get("Test Relative Root", __ROOT__))
        # check root for sub- and sub-sub-corpora
        self.assertEqual(global_root / "Test Corpus", get("Test Sub-Corpus", __ROOT__))
        self.assertEqual(global_root / "Test Corpus" / "Test Sub-Corpus", get("Test Sub-Sub-Corpus", __ROOT__))

        # check path for normal corpus
        self.assertEqual(global_root / "Test Corpus", get("Test Corpus", __PATH__))
        # check relative path
        self.assertEqual(global_root / "some/relative/path", get("Test Relative Path", __PATH__))
        # check absolute path
        self.assertEqual(Path("/some/absolute/path"), get("Test Absolute Path", __PATH__))
        self.assertEqual(Path("~/some/absolute/path").expanduser(), get("Test Absolute Path Home", __PATH__))
        
        # check path for sub- and sub-sub-corpora
        self.assertEqual(global_root / "Test Corpus" / "Test Sub-Corpus", get("Test Sub-Corpus", __PATH__))
        self.assertEqual(global_root / "Test Corpus" / "Test Sub-Corpus" / "Test Sub-Sub-Corpus",
                         get("Test Sub-Sub-Corpus", __PATH__))

        # check sub-corpora with relative and absolute path
        self.assertEqual(global_root / "some/relative/path", get("Test Relative Sub-Path", __PATH__))
        self.assertEqual(Path("/some/absolute/path"), get("Test Absolute Sub-Path", __PATH__))
        self.assertEqual(Path("~/some/absolute/path").expanduser(), get("Test Absolute Sub-Path Home", __PATH__))