Ejemplo n.º 1
0
    def analyze_alchemyapi(self, entities):
        '''find out 1.the ancestor 2.the corresponding text 3.update query'''
        # the alchemyapi Query instance
        query = Query(self.query.query)

        query.feature_words = query.query
        query.pattern = query.query

        for e in entities:
            ancestor = self.searchet_ontology.get_pedigree(
                'http://www.alchemyapi.com/api/entity/types.html#',
                e.entity_type)
            text = e.entity_text
            query.token_entities[text] = ancestor
            query.entity_tokens[ancestor] = text

            query.pattern = query.pattern.replace(text, str(ancestor))
            query.feature_words = query.feature_words.replace(text, "")

        # feature words
        query.feature_words = [
            t.strip().lower().capitalize()
            for t in query.feature_words.split() if t
        ]
        query.pattern = [p.strip() for p in query.pattern.split() if p]

        return query
Ejemplo n.º 2
0
    def analyze_freebase(self, entity):
        '''docs'''
        # the freebase Query instance
        query = Query(self.query.query)

        query.feature_words = query.query
        query.pattern = query.query

        ancestors = []
        for t in entity.entity_type:
            ancestor = self.searchet_ontology.get_pedigree(
                'http://schemas.freebaseapps.com/type?id=', t)
            ancestors.append(ancestor)
        # dedup
        ancestors = list(set(ancestors))

        #special treatment for entity text
        text = ''
        for t in entity.entity_text:
            if t in query.query.lower():
                low = query.query.lower().find(t)
                high = len(t) + 1
                text = query.query[low:high]
                break
        if text:
            query.token_entities[text] = ancestors

            for ancestor in ancestors:
                query.entity_tokens[ancestor] = text

            query.feature_words = query.feature_words.replace(text, "")

        query.feature_words = [
            t.lower().strip().capitalize()
            for t in query.feature_words.split() if t
        ]

        if any(ancestors):
            query.pattern = query.pattern.replace(text, '|'.join(ancestors))
            query.pattern = [p.strip() for p in query.pattern if p]

        return query
Ejemplo n.º 3
0
    def analyze_alchemyapi(self, entities):
        '''find out 1.the ancestor 2.the corresponding text 3.update query'''
        # the alchemyapi Query instance
        query = Query(self.query.query)
        
        query.feature_words = query.query
        query.pattern = query.query

        for e in entities:
            ancestor = self.searchet_ontology.get_pedigree('http://www.alchemyapi.com/api/entity/types.html#', e.entity_type)
            text = e.entity_text
            query.token_entities[text] = ancestor
            query.entity_tokens[ancestor] = text
            
            query.pattern = query.pattern.replace(text, str(ancestor))
            query.feature_words = query.feature_words.replace(text, "")
        
        # feature words
        query.feature_words = [t.strip().lower().capitalize() for t in query.feature_words.split() if t]
        query.pattern = [p.strip() for p in query.pattern.split() if p]

        return query
Ejemplo n.º 4
0
    def analyze_freebase(self, entity):
        '''docs'''
        # the freebase Query instance
        query = Query(self.query.query)

        query.feature_words = query.query
        query.pattern = query.query

        ancestors = []
        for t in entity.entity_type:
            ancestor = self.searchet_ontology.get_pedigree('http://schemas.freebaseapps.com/type?id=', t)
            ancestors.append(ancestor)
        # dedup
        ancestors = list(set(ancestors))

        #special treatment for entity text
        text = ''
        for t in entity.entity_text:
            if t in query.query.lower():
                low = query.query.lower().find(t)
                high = len(t) + 1
                text = query.query[low:high]
                break
        if text:
            query.token_entities[text] = ancestors

            for ancestor in ancestors:
                query.entity_tokens[ancestor] = text

            query.feature_words = query.feature_words.replace(text, "")

        query.feature_words = [t.lower().strip().capitalize() for t in query.feature_words.split() if t]

        if any(ancestors):
            query.pattern = query.pattern.replace(text, '|'.join(ancestors))
            query.pattern = [p.strip() for p in query.pattern if p]

        return query
Ejemplo n.º 5
0
    def __init__(self, query=None):
        self.query = Query(query)

        # instance of searchet-ontology-api
        self.searchet_ontology = SearchetOntology(source='searchet_v2.n3',
                                                  format='n3')