Exemple #1
0
    def test_generate_summary(self):
        topics = {
            'PUP1A': [
                Document('TST_ENG_20190101.0001'),
                Document('TST_ENG_20190101.0002'),
                Document('TST20190201.0001'),
                Document('TST20190201.0002')
            ],
            'WAR2A': [
                Document('TST_ENG_20190301.0001'),
                Document('TST_ENG_20190301.0002'),
                Document('TST20190401.0001'),
                Document('TST20190401.0002')
            ]
        }
        WordMap.create_mapping()
        vec = Vectors()
        vec.create_freq_vectors(topics)
        idf = MeadSummaryGenerator(self.doc_list, MeadContentSelector(),
                                   self.args).get_idf_array()

        for topic_id, documents in topics.items():
            summarizer = MeadSummaryGenerator(documents, MeadContentSelector(),
                                              self.args)
            summary = summarizer.generate_summary(idf)
            self.assertIsNot(summary, None)
Exemple #2
0
def load_documents_for_topics(topic_soup):
    """
    Load documents for each topic
    :param topic_soup:
    :return:
    """
    topics = {}
    for topic in topic_soup.find_all('topic'):
        documents = load_documents(topic)
        topics[topic['id']] = documents

    # At this point, all docs have been loaded and all unique words are stored in WordMap set
    # Need to trigger creation of mapping and of vectors
    WordMap.create_mapping()
    vec = Vectors()
    vec.create_freq_vectors(topics)  # do we need to have this here if we don't run mead based content selection
    vec.create_term_doc_freq(topics)

    return topics
Exemple #3
0
    def test_mead_summary_length(self):
        """
        Test length of summary is less than 100 words
        :return:
        """
        topics = {
            'PUP1A': [
                Document('TST_ENG_20190101.0001'),
                Document('TST_ENG_20190101.0002'),
                Document('TST20190201.0001'),
                Document('TST20190201.0002')
            ],
            'WAR2A': [
                Document('TST_ENG_20190301.0001'),
                Document('TST_ENG_20190301.0002'),
                Document('TST20190401.0001'),
                Document('TST20190401.0002')
            ]
        }
        WordMap.create_mapping()
        vec = Vectors()
        vec.create_freq_vectors(topics)
        idf = MeadSummaryGenerator(self.doc_list, MeadContentSelector(),
                                   self.args).get_idf_array()
        max_length = 100

        for topic_id, documents in topics.items():
            generator = MeadSummaryGenerator(documents, MeadContentSelector(),
                                             self.args)
            generator.select_content(idf)
            generator.order_information()
            realized_content = generator.realize_content()
            realized_content = [
                w for w in realized_content.split(" ") if not " "
            ]
            content_length = len(realized_content)
            self.assertLessEqual(content_length, max_length)