Exemple #1
0
    def test_entity_recognition(self):
        with open("tests/data/sample.txt", "r") as f:
            sample = f.read()
        results = brain.entities(sample)

        expected = [
            "Second Red Scare",
            "Bhagavad Gita",
            "Soviet Union",
            "Phillips",
            "John F. Kennedy",
            "Julius Robert Oppenheimer",
            "World War II",
            "Institute for Advanced Study in Princeton",
            "Trinity",
            "Oppenheimer",
            "New Mexico",
            "Lyndon B. Johnson",
            "United States Atomic Energy Commission",
            "University of California",
            "Manhattan Project",
            "Born",
            "Enrico Fermi",
            "American",
            "Berkeley",
            "Enrico Fermi Award",
        ]

        self.assertEqual(set(results), set(expected))
Exemple #2
0
    def test_entity_recognition(self):
        with open('tests/data/sample.txt', 'r') as f:
            sample = f.read()
        results = brain.entities(sample)

        expected = [
            'Second Red Scare',
            'Bhagavad Gita',
            'Soviet Union',
            'Phillips',
            'John F. Kennedy',
            'Julius Robert Oppenheimer',
            'World War II',
            'Institute for Advanced Study in Princeton',
            'Trinity',
            'Oppenheimer',
            'New Mexico',
            'Lyndon B. Johnson',
            'United States Atomic Energy Commission',
            'University of California',
            'Manhattan Project',
            'Born',
            'Enrico Fermi',
            'American',
            'Berkeley',
            'Enrico Fermi Award',
        ]

        self.assertEqual(set(results), set(expected))
Exemple #3
0
    def vectorize(self):
        """
        Returns a tuple of vectors representing this article.

        Articles are represented by:
            (bag of words vector, entities vector)
        """
        if self.vectors is None:
            bow_vec = vectorize(self.text)
            ent_vec = vectorize(' '.join(entities(self.text)))
            self.vectors = [bow_vec, ent_vec]
        return self.vectors
Exemple #4
0
    def vectorize(self):
        """
        Returns a tuple of vectors representing this article.

        Articles are represented by:
            (bag of words vector, entities vector)
        """
        if self.vectors is None:
            bow_vec = vectorize(self.text)
            ent_vec = vectorize(' '.join(entities(self.text)))
            self.vectors = [bow_vec, ent_vec]
        return self.vectors
Exemple #5
0
    def entitize(self):
        """
        Process the article text for entities.
        """
        ents = []
        for e_name in entities(self.text):
            # TO DO: Need to find a way of getting canonical name.

            # Search for the entity.
            slug = slugify(e_name)
            e = Entity.query.get(slug)

            # If one doesn't exist, create a new one.
            if not e:
                e = Entity(e_name)
                db.session.add(e)
                db.session.commit()
            ents.append(e)
        self.entities = ents
Exemple #6
0
    def entitize(self):
        """
        Process the article text for entities.
        """
        ents = []
        for e_name in entities(self.text):
            # TO DO: Need to find a way of getting canonical name.

            # Search for the entity.
            slug = slugify(e_name)
            e = Entity.query.get(slug)

            # If one doesn't exist, create a new one.
            if not e:
                e = Entity(e_name)
                db.session.add(e)
                db.session.commit()
            ents.append(e)
        self.entities = ents