def analyze_alchemyapi(self, entities): '''find out 1.the ancestor 2.the corresponding text 3.update query''' # the alchemyapi Query instance query = Query(self.query.query) query.feature_words = query.query query.pattern = query.query for e in entities: ancestor = self.searchet_ontology.get_pedigree( 'http://www.alchemyapi.com/api/entity/types.html#', e.entity_type) text = e.entity_text query.token_entities[text] = ancestor query.entity_tokens[ancestor] = text query.pattern = query.pattern.replace(text, str(ancestor)) query.feature_words = query.feature_words.replace(text, "") # feature words query.feature_words = [ t.strip().lower().capitalize() for t in query.feature_words.split() if t ] query.pattern = [p.strip() for p in query.pattern.split() if p] return query
def analyze_freebase(self, entity): '''docs''' # the freebase Query instance query = Query(self.query.query) query.feature_words = query.query query.pattern = query.query ancestors = [] for t in entity.entity_type: ancestor = self.searchet_ontology.get_pedigree( 'http://schemas.freebaseapps.com/type?id=', t) ancestors.append(ancestor) # dedup ancestors = list(set(ancestors)) #special treatment for entity text text = '' for t in entity.entity_text: if t in query.query.lower(): low = query.query.lower().find(t) high = len(t) + 1 text = query.query[low:high] break if text: query.token_entities[text] = ancestors for ancestor in ancestors: query.entity_tokens[ancestor] = text query.feature_words = query.feature_words.replace(text, "") query.feature_words = [ t.lower().strip().capitalize() for t in query.feature_words.split() if t ] if any(ancestors): query.pattern = query.pattern.replace(text, '|'.join(ancestors)) query.pattern = [p.strip() for p in query.pattern if p] return query
def analyze_alchemyapi(self, entities): '''find out 1.the ancestor 2.the corresponding text 3.update query''' # the alchemyapi Query instance query = Query(self.query.query) query.feature_words = query.query query.pattern = query.query for e in entities: ancestor = self.searchet_ontology.get_pedigree('http://www.alchemyapi.com/api/entity/types.html#', e.entity_type) text = e.entity_text query.token_entities[text] = ancestor query.entity_tokens[ancestor] = text query.pattern = query.pattern.replace(text, str(ancestor)) query.feature_words = query.feature_words.replace(text, "") # feature words query.feature_words = [t.strip().lower().capitalize() for t in query.feature_words.split() if t] query.pattern = [p.strip() for p in query.pattern.split() if p] return query
def analyze_freebase(self, entity): '''docs''' # the freebase Query instance query = Query(self.query.query) query.feature_words = query.query query.pattern = query.query ancestors = [] for t in entity.entity_type: ancestor = self.searchet_ontology.get_pedigree('http://schemas.freebaseapps.com/type?id=', t) ancestors.append(ancestor) # dedup ancestors = list(set(ancestors)) #special treatment for entity text text = '' for t in entity.entity_text: if t in query.query.lower(): low = query.query.lower().find(t) high = len(t) + 1 text = query.query[low:high] break if text: query.token_entities[text] = ancestors for ancestor in ancestors: query.entity_tokens[ancestor] = text query.feature_words = query.feature_words.replace(text, "") query.feature_words = [t.lower().strip().capitalize() for t in query.feature_words.split() if t] if any(ancestors): query.pattern = query.pattern.replace(text, '|'.join(ancestors)) query.pattern = [p.strip() for p in query.pattern if p] return query
def __init__(self, query=None): self.query = Query(query) # instance of searchet-ontology-api self.searchet_ontology = SearchetOntology(source='searchet_v2.n3', format='n3')