Beispiel #1
0
 def summarize_(corpus: Corpus) -> PartialSummary:
     """
     Provides automated input and output summaries for Corpus
     """
     table_summary = summarize.dispatch(Table)(corpus)
     extras = ((f"<br/><nobr>Tokens: {sum(map(len, corpus.tokens))}, "
                f"Types: {len(corpus.dictionary)}</nobr>")
               if corpus.has_tokens() else
               "<br/><nobr>Corpus is not preprocessed</nobr>")
     return PartialSummary(table_summary.summary,
                           table_summary.details + extras)
Beispiel #2
0
    def test_corpus_not_preprocessed(self):
        """Check if details part of the summary is formatted correctly"""
        corpus = Corpus.from_file("book-excerpts")

        n_features = len(corpus.domain.variables) + len(corpus.domain.metas)
        details = (
            f"<nobr>{len(corpus)} instances, {n_features} variables</nobr><br/>"
            f"<nobr>Features: — (no missing values)</nobr><br/>"
            f"<nobr>Target: categorical</nobr><br/>"
            f"<nobr>Metas: string</nobr><br/>"
            f"<nobr>Corpus is not preprocessed</nobr>")
        summary = summarize.dispatch(Corpus)(corpus)
        self.assertEqual(140, summary.summary)
        self.assertEqual(details, summary.details)
Beispiel #3
0
    def test_corpus_preprocessed(self):
        """Check if details part of the summary is formatted correctly"""
        corpus = Corpus.from_file("book-excerpts")
        corpus = RegexpTokenizer()(corpus)

        n_features = len(corpus.domain.variables) + len(corpus.domain.metas)
        details = (
            f"<nobr><b><u>book-excerpts</u></b>: "
            f"{len(corpus)} instances, {n_features} variables</nobr><br/>"
            f"<nobr>Features: — (no missing values)</nobr><br/>"
            f"<nobr>Target: categorical</nobr><br/>"
            f"<nobr>Metas: string</nobr><br/>"
            f"<nobr>Tokens: 128020, Types: 11712</nobr>")
        summary = summarize.dispatch(Corpus)(corpus)
        self.assertEqual(140, summary.summary)
        self.assertEqual(details, summary.details)