Esempio n. 1
0
    def test_corpus_sents_words(self):
        """Test abydos.corpus.Corpus.sents, .words."""
        doc_str = 'a b c d\n\ne f g\nh i j\nk'
        doc_corp = Corpus(doc_str)

        self.assertEqual(
            doc_corp.sents(),
            [['a', 'b', 'c', 'd'], ['e', 'f', 'g'], ['h', 'i', 'j'], ['k']],
        )
        self.assertEqual(
            doc_corp.words(),
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'],
        )
Esempio n. 2
0
    def test_corpus_sents_words(self):
        """Test abydos.corpus.Corpus.sents, .words."""
        doc_str = 'a b c d\n\ne f g\nh i j\nk'
        doc_corp = Corpus(doc_str)

        self.assertEqual(
            doc_corp.sents(),
            [['a', 'b', 'c', 'd'], ['e', 'f', 'g'], ['h', 'i', 'j'], ['k']],
        )
        self.assertEqual(
            doc_corp.words(),
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'],
        )
Esempio n. 3
0
    def test_corpus_docs_sents_words(self):
        """Test abydos.corpus.docs, .sents, .words, .docs_of_words, .raw."""
        doc_str = 'a b c d\n\ne f g\nh i j\nk'
        doc_corp = Corpus(doc_str)

        self.assertEqual(doc_corp.docs(),
                         [[['a', 'b', 'c', 'd']],
                          [['e', 'f', 'g'], ['h', 'i', 'j'], ['k']]])
        self.assertEqual(doc_corp.paras(),
                         [[['a', 'b', 'c', 'd']],
                          [['e', 'f', 'g'], ['h', 'i', 'j'], ['k']]])
        self.assertEqual(
            doc_corp.sents(),
            [['a', 'b', 'c', 'd'], ['e', 'f', 'g'], ['h', 'i', 'j'], ['k']])
        self.assertEqual(
            doc_corp.words(),
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'])

        self.assertEqual(
            doc_corp.docs_of_words(),
            [['a', 'b', 'c', 'd'], ['e', 'f', 'g', 'h', 'i', 'j', 'k']])
        self.assertEqual(doc_corp.raw(), doc_str)
Esempio n. 4
0
    def test_corpus_docs_sents_words(self):
        """test abydos.corpus.docs, .sents, .words, .docs_of_words, .raw
        """
        doc_str = 'a b c d\n\ne f g\nh i j\nk'
        doc_corp = Corpus(doc_str)

        self.assertEqual(doc_corp.docs(),
                         [[['a', 'b', 'c', 'd']],
                          [['e', 'f', 'g'], ['h', 'i', 'j'], ['k']]])
        self.assertEqual(doc_corp.paras(),
                         [[['a', 'b', 'c', 'd']],
                          [['e', 'f', 'g'], ['h', 'i', 'j'], ['k']]])
        self.assertEqual(doc_corp.sents(),
                         [['a', 'b', 'c', 'd'], ['e', 'f', 'g'],
                          ['h', 'i', 'j'], ['k']])
        self.assertEqual(doc_corp.words(),
                         ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
                          'k'])

        self.assertEqual(doc_corp.docs_of_words(),
                         [['a', 'b', 'c', 'd'],
                          ['e', 'f', 'g', 'h', 'i', 'j', 'k']])
        self.assertEqual(doc_corp.raw(),
                         doc_str)