コード例 #1
0
ファイル: event_test.py プロジェクト: publicscience/argos
    def test_summary_sentences(self):
        # Check to see that we can break up the summary
        # back into its original sentences.

        from argos.core.brain import summarizer
        title = 'Syria Misses New Deadline as It Works to Purge Arms'
        text = 'Syria missed a revised deadline on Sunday for completing the export or destruction of chemicals in its weapons arsenal, but the government of the war-ravaged country may be only days away from finishing the job, according to international experts overseeing the process. The Syrian government had agreed to complete the export or destruction of about 1,200 tons of chemical agents by April 27 after missing a February deadline, but by Sunday, it had shipped out or destroyed 92.5 percent of the arsenal, said Sigrid Kaag, the coordinator of the joint mission by the United Nations and the watchdog agency the Organization for the Prohibition of Chemical Weapons.'
        expected_sents = summarizer.summarize(title, text)

        source = Source()
        source.name = 'Super Cool Times'

        article = Article(title=title, text=text, score=100)
        article.source = source
        article.ext_url = 'http://foo.com'

        self.event = Event([article])

        expected = [{
            'sentence': sent,
            'source': 'Super Cool Times',
            'url': 'http://foo.com'
        } for sent in expected_sents]

        self.assertEqual(self.event.summary_sentences, expected)
コード例 #2
0
def process_events(h, clusters):
    """
    Takes clusters of node uuids and
    builds, modifies, and deletes events out of them.
    """
    now = datetime.utcnow()

    # Get existing event clusters.
    event_map = {}
    existing  = {}
    for e in Event.all_active():
        # Map event ids to their event, for lookup later.
        event_map[e.id] = e

        # Map event ids to a list of their member node ids.
        existing[e.id]  = [a.node_id for a in e.articles]

    # Figure out which events to update, delete, and create.
    to_update, to_create, to_delete, unchanged = triage(existing, clusters)

    for a_ids in to_create:
        articles = Article.query.filter(Article.node_id.in_([id.item() for id in a_ids])).order_by(Article.created_at.desc()).all()
        e = Event(articles)

        e.created_at = articles[0].created_at
        e.updated_at = articles[-1].updated_at

        rep_article = representative_article(h, a_ids, articles)
        e.title = rep_article.title
        e.image = rep_article.image

        db.session.add(e)

    for e_id, a_ids in to_update.items():
        e = event_map[e_id]
        articles = Article.query.filter(Article.node_id.in_([id.item() for id in a_ids])).all()
        e.members = articles

        rep_article = representative_article(h, a_ids, articles)
        e.title = rep_article.title
        e.image = rep_article.image

        e.update()

    # Freeze expiring events and clean up their articles from the hierarchy.
    for e_id in unchanged:
        e = event_map[e_id]
        if (now - e.updated_at).days > 3:
            e.active = False
            nodes = [h.to_iid(a.node_id) for a in e.articles]
            h.prune(nodes)

    # Do this LAST so any of this event's associated articles
    # have a chance to be moved to their new clusters (if any).
    for e_id in to_delete:
        db.session.delete(event_map[e_id])
        # does this need to prune the articles as well?
        # i think the assumption is that a deleted event's articles have all migrated elsewhere.

    db.session.commit()
コード例 #3
0
    def test_event_similarity_with_cluster_different(self):
        self.prepare_event()
        members = self.prepare_articles(type='different')
        c = Event(members)

        avg_sim = self.cluster.similarity(c)
        self.assertNotEqual(avg_sim, 1.0)
        self.assertNotEqual(avg_sim, 0.0)
コード例 #4
0
 def test_event_entitize(self):
     members = [
         Article(title='Robots', text='dinosaurs are cool, Reagan'),
         self.prepare_articles()[0]
     ]
     self.cluster = Event(members)
     entities = {ent.name for ent in self.cluster.entities}
     self.assertEqual(entities, {'Clinton', 'Reagan'})
コード例 #5
0
ファイル: story_test.py プロジェクト: keho98/argos
    def test_story_clustering_without_matching_entities(self):
        story = fac.story()

        # Create an event with completely different entities
        # from the story.
        article = fac.article(title='The Illiad',
                              text='The Illiad has Argos in it.')
        event = Event([article])

        Story.cluster([event])
        self.assertEqual(len(story.members), 2)
        self.assertEqual(Story.query.count(), 2)
コード例 #6
0
    def test_event_similarity_with_cluster_duplicates(self):
        self.prepare_event()
        members = (self.prepare_articles())
        c = Event(members)
        avg_sim = self.cluster.similarity(c)

        # Currently, the similarity calculation between clusters
        # does not yield 1.0 if they are identical clusters,
        # because we calculate the average similarity of the articles
        # between the clusters, rather than the overlap of the two clusters.
        #self.assertEqual(avg_sim, 1.0)
        self.assertAlmostEqual(avg_sim, 0.83999999999999)
コード例 #7
0
    def _create_dated_story(self):
        datetime_A = datetime.utcnow() - timedelta(days=1)
        datetime_B = datetime.utcnow() - timedelta(days=5)

        article_a = fac.article(title='The Illiad', text='The Illiad has Argos in it.')
        event_a = Event([article_a])
        event_a.created_at = datetime_A

        article_b = fac.article(title='The Illiad', text='The Illiad has Argos in it.')
        event_b = Event([article_b])
        event_b.created_at = datetime_B

        article_c = fac.article(title='The Illiad', text='The Illiad has Argos in it.')
        event_c = Event([article_c])
        event_c.created_at = datetime_A

        story = Story([event_a, event_b, event_c])

        self.db.session.add(story)
        self.db.session.commit()

        return story, datetime_A, datetime_B
コード例 #8
0
ファイル: event_test.py プロジェクト: publicscience/argos
    def test_conceptize(self):
        members = [
            Article(title='Robots', text='dinosaurs are cool, Reagan'),
            self.prepare_articles()[0]
        ]
        self.event = Event(members)

        concepts = {con.slug for con in self.event.concepts}
        mentions = {ali.name for ali in self.event.mentions}

        self.assertEqual(concepts, {'Clinton', 'Reagan'})
        self.assertEqual(mentions, {'Clinton', 'Reagan'})

        # Each concept's score won't be 0.5, since
        # they are weighed down by the commonness.
        for concept in self.event.concepts:
            self.assertAlmostEqual(concept.score, 0.005, places=3)
コード例 #9
0
 def test_event_timespan(self):
     text = 'the worldly philosophers today cautious optimism is based to a large extent on technological breakthroughs'
     members = [
         Article(title='A',
                 text=text,
                 created_at=datetime(2014, 1, 20, 1, 1, 1, 111111)),
         Article(title='B',
                 text=text,
                 created_at=datetime(2014, 1, 22, 1, 1, 1, 111111)),
         Article(title='C',
                 text=text,
                 created_at=datetime(2014, 1, 24, 1, 1, 1, 111111))
     ]
     self.cluster = Event(members)
     results = self.cluster.timespan(datetime(2014, 1, 21, 1, 1, 1, 111111))
     self.assertEqual(len(results), 2)
     self.assertEqual({r.title for r in results}, {'B', 'C'})
コード例 #10
0
ファイル: event_test.py プロジェクト: publicscience/argos
    def test_event_deletion_removes_from_articles_events(self):
        articles = self.prepare_articles()
        for article in articles:
            self.db.session.add(article)

        # Make an event.
        self.event = Event(articles)
        self.db.session.add(self.event)
        self.db.session.commit()

        # The articles should reference their events.
        for article in articles:
            self.assertEqual(article.events, [self.event])

        # Destroy events.
        Event.query.delete()
        self.db.session.commit()

        # The articles should no longer have references to the events.
        for article in articles:
            self.assertEqual(article.events, [])
コード例 #11
0
ファイル: event_test.py プロジェクト: publicscience/argos
 def test_summarize(self):
     self.event = Event(self.prepare_articles())
     self.assertTrue(self.event.summary)
コード例 #12
0
ファイル: event_test.py プロジェクト: publicscience/argos
    def test_score_prefer_newer_events(self):
        event_a = Event(self.prepare_articles())
        event_b = Event(self.prepare_articles())

        self.assertGreater(event_b.score, event_a.score)
コード例 #13
0
ファイル: event_test.py プロジェクト: publicscience/argos
    def test_score_prefer_events_with_higher_article_scores(self):
        event_a = Event(self.prepare_articles())
        event_b = Event(self.prepare_articles(score=200))

        self.assertGreater(event_b.score, event_a.score)
コード例 #14
0
 def test_event_similarity_with_object_duplicates(self):
     members = self.prepare_articles(type='duplicate')
     c = Event(members)
     avg_sim = c.similarity(self.article)
     self.assertEqual(avg_sim, 1.0)
コード例 #15
0
 def prepare_event(self):
     self.cluster = Event(self.prepare_articles())
     self.db.session.add(self.cluster)
     self.db.session.commit()
コード例 #16
0
 def test_event_summarize_single_article(self):
     self.cluster = Event([self.prepare_articles()[0]])
     self.assertTrue(self.cluster.summary)
コード例 #17
0
 def test_event_summarize(self):
     self.cluster = Event(self.prepare_articles())
     self.assertTrue(self.cluster.summary)
コード例 #18
0
 def test_event_titleize(self):
     members = [Article(title='Robots', text='dinosaurs are cool, Reagan')
                ] + self.prepare_articles(type='duplicate')
     self.cluster = Event(members)
     self.assertEqual(self.cluster.title, 'Dinosaurs')
コード例 #19
0
 def test_event_entitize_no_duplicates(self):
     self.cluster = Event(self.prepare_articles())
     entities = [ent.name for ent in self.cluster.entities]
     self.assertEqual(entities, ['Clinton'])
コード例 #20
0
ファイル: event_test.py プロジェクト: publicscience/argos
 def prepare_event(self):
     self.event = Event(self.prepare_articles())
     self.db.session.add(self.event)
     self.db.session.commit()
コード例 #21
0
ファイル: event_test.py プロジェクト: publicscience/argos
 def test_conceptize_no_duplicates(self):
     self.event = Event(self.prepare_articles())
     concepts = [con.slug for con in self.event.concepts]
     mentions = [ali.name for ali in self.event.mentions]
     self.assertEqual(concepts, ['Clinton'])
     self.assertEqual(mentions, ['Clinton'])