Ejemplo n.º 1
0
 def test_ampersands_name(self):
     test_names = ['Foo & Bar']
     for name in test_names:
         knowledge.uri_for_name(name)
         knowledge.summary_for_name(name)
         knowledge.coordinates_for_name(name)
         knowledge.image_for_name(name)
         knowledge.aliases_for_name(name)
Ejemplo n.º 2
0
 def test_ampersands_name(self):
     test_names = [
         'Foo & Bar'
     ]
     for name in test_names:
         knowledge.uri_for_name(name)
         knowledge.summary_for_name(name)
         knowledge.coordinates_for_name(name)
         knowledge.image_for_name(name)
         knowledge.aliases_for_name(name)
Ejemplo n.º 3
0
    def __init__(self, name):
        """
        Initialize a concept by a name, which can be
        an alias (it does not have to be the canonical name).
        This specified name will be saved as an Alias.

        A canonical name will be looked for; if one is found
        it will be used as the slug for this Concept.
        """
        self.aliases.append(Alias(name))

        # Try to get a canonical URI
        # and derive the slug from that.
        self.uri = knowledge.uri_for_name(name)
        if self.uri:
            self.slug = self.uri.split('/')[-1]
            k = knowledge.knowledge_for(uri=self.uri, fallback=True)
            self.commonness = knowledge.commonness_for_uri(self.uri)

            self.summary = k['summary']
            self.name = k['name']

            # Download the image.
            if k['image'] is not None:
                ext = splitext(k['image'])[-1].lower()
                self.image = storage.save_from_url(
                    k['image'], '{0}{1}'.format(hash(self.slug), ext))

        # If no URI was found,
        # generate our own slug.
        # Note: A problem here is that it assumes that
        # this particular name is the canonical one,
        # and that we don't collect any information for it.
        else:
            self.slug = slugify(name)
Ejemplo n.º 4
0
    def __init__(self, name):
        """
        Initialize a concept by a name, which can be
        an alias (it does not have to be the canonical name).
        This specified name will be saved as an Alias.

        A canonical name will be looked for; if one is found
        it will be used as the slug for this Concept.
        """
        self.aliases.append(Alias(name))

        # Try to get a canonical URI
        # and derive the slug from that.
        self.uri = knowledge.uri_for_name(name)
        if self.uri:
            self.slug = self.uri.split('/')[-1]
            k = knowledge.knowledge_for(uri=self.uri, fallback=True)
            self.commonness = knowledge.commonness_for_uri(self.uri)

            self.summary = k['summary']
            self.name = k['name']

            # Download the image.
            if k['image'] is not None:
                ext = splitext(k['image'])[-1].lower()
                self.image = storage.save_from_url(k['image'], '{0}{1}'.format(hash(self.slug), ext))

        # If no URI was found,
        # generate our own slug.
        # Note: A problem here is that it assumes that
        # this particular name is the canonical one,
        # and that we don't collect any information for it.
        else:
            self.slug = slugify(name)
Ejemplo n.º 5
0
    def conceptize(self):
        """
        Process the article text for concepts,
        and add the appropriate mentions.
        """
        concepts = []
        for c_name in gx.concepts(self.text):
            # Search for the concept.
            uri = knowledge.uri_for_name(c_name)

            if uri:
                slug = uri.split('/')[-1]
            else:
                slug = slugify(c_name)
            c = Concept.query.get(slug)

            # If an concept is found...
            if c:
                # Add this name as a new alias, if necessary.
                alias = Alias.query.filter_by(name=c_name, concept=c).first()
                if not alias:
                    alias = Alias(c_name)
                    c.aliases.append(alias)
                # Avoid duplicate aliases.
                if alias not in self.mentions:
                    self.mentions.append(alias)

            # If one doesn't exist, create a new one.
            if not c:
                c = Concept(c_name)
                self.mentions.append(c.aliases[0])
                db.session.add(c)
                db.session.commit()

            concepts.append(c)

        # Score the concepts' importance.
        total_found = len(concepts)
        counter = Counter(concepts)
        uniq_concepts = set(concepts)

        assocs = []
        for concept in uniq_concepts:
            score = counter[concept] / total_found
            assoc = ArticleConceptAssociation(concept, score)
            assocs.append(assoc)

        self.concept_associations = assocs
Ejemplo n.º 6
0
    def conceptize(self):
        """
        Process the article text for concepts,
        and add the appropriate mentions.
        """
        concepts = []
        for c_name in gx.concepts(self.text):
            # Search for the concept.
            uri = knowledge.uri_for_name(c_name)

            if uri:
                slug = uri.split("/")[-1]
            else:
                slug = slugify(c_name)
            c = Concept.query.get(slug)

            # If an concept is found...
            if c:
                # Add this name as a new alias, if necessary.
                alias = Alias.query.filter_by(name=c_name, concept=c).first()
                if not alias:
                    alias = Alias(c_name)
                    c.aliases.append(alias)
                # Avoid duplicate aliases.
                if alias not in self.mentions:
                    self.mentions.append(alias)

            # If one doesn't exist, create a new one.
            if not c:
                c = Concept(c_name)
                self.mentions.append(c.aliases[0])
                db.session.add(c)
                db.session.commit()

            concepts.append(c)

        # Score the concepts' importance.
        total_found = len(concepts)
        counter = Counter(concepts)
        uniq_concepts = set(concepts)

        assocs = []
        for concept in uniq_concepts:
            score = counter[concept] / total_found
            assoc = ArticleConceptAssociation(concept, score)
            assocs.append(assoc)

        self.concept_associations = assocs
Ejemplo n.º 7
0
 def test_uri_for_name(self):
     self.mock_resp.read.return_value = b'{\n  "head": {\n    "vars": [ "uri" ]\n  } ,\n  "results": {\n    "bindings": [\n      {\n        "uri": { "type": "uri" , "value": "http://dbpedia.org/resource/United_States_Secretary_of_State" }\n      }\n    ]\n  }\n}\n'
     uri = knowledge.uri_for_name('United States Secretary of State')
     self.assertEqual(uri, 'http://dbpedia.org/resource/United_States_Secretary_of_State')
Ejemplo n.º 8
0
 def test_uri_for_name(self):
     self.mock_resp.read.return_value = b'{\n  "head": {\n    "vars": [ "uri" ]\n  } ,\n  "results": {\n    "bindings": [\n      {\n        "uri": { "type": "uri" , "value": "http://dbpedia.org/resource/United_States_Secretary_of_State" }\n      }\n    ]\n  }\n}\n'
     uri = knowledge.uri_for_name('United States Secretary of State')
     self.assertEqual(
         uri,
         'http://dbpedia.org/resource/United_States_Secretary_of_State')