def run_relation_annotator(self, doctag, annotator): """ Classify a document using an annotator and insert results into the database :param doctag: tag of the document :param annotator: annotator to classify :return: """ # process whole document instead of sentence by sentence sentences = self.get_sentences(doctag) data = bottle.request.json output = {} for a in self.relation_annotators: # a in (annotator_name, annotator_engine, annotator_etype) if a[0] == annotator: input_sentences = [] for s in sentences: sentence = Sentence(s[2], offset=s[3], sid=s[1], did=doctag) sentence.process_corenlp_output(ast.literal_eval(s[4])) sentence = self.get_entities(sentence) input_sentences.append(sentence) sentence_results = self.relation_annotators[a].annotate_sentences(input_sentences) for sentence in input_sentences: if a[1] == "jsre": pred, original = sentence_results[s[1]] sentence_relations = self.relation_annotators[a].process_sentence(pred, original, sentence) elif a[1] == "smil": sentence_relations = self.relation_annotators[a].process_sentence(sentence) for p in sentence_relations: self.add_relation(p, annotator) output[p.pid] = str(p) return json.dumps(output)
def run_entity_annotator(self, doctag, annotator): """ Classify a document using an annotator and insert results into the database :param doctag: tag of the document :param annotator: annotator to classify :return: """ sentences = self.get_sentences(doctag) data = bottle.request.json output = {} for a in self.entity_annotators: # a in (annotator_name, annotator_engine, annotator_etype) if a[0] == annotator: for s in sentences: sentence = Sentence(s[2], offset=s[3], sid=s[1], did=doctag) #sentence.process_sentence(self.corenlp) sentence.process_corenlp_output(ast.literal_eval(s[4])) sentence_text = " ".join([t.text for t in sentence.tokens]) sentence_output = self.entity_annotators[a].annotate_sentence(sentence_text) #print sentence_output sentence_entities = self.entity_annotators[a].process_sentence(sentence_output, sentence) for e in sentence_entities: sentence_entities[e].normalize() self.add_entity(sentence_entities[e], annotator) output[e] = str(sentence_entities[e]) # print output return json.dumps(output)
def get_document(self, doctag): # return document entry with doctag cur = self.db_conn.cursor() query = """SELECT distinct id, doctag, title, doctext FROM document WHERE doctag =%s;""" # print "QUERY", query cur.execute(query, (doctag, )) res = cur.fetchone() if res is not None: result = { 'docID': res[1], 'title': res[2], 'docText': res[3], 'abstract': { 'sentences': [] } } sentences = self.get_sentences(doctag) for s in sentences: sentence = Sentence(s[2], offset=s[3], sid=s[1], did=doctag) sentence.process_corenlp_output(ast.literal_eval(s[4])) sentence = self.get_entities(sentence) result['abstract']['sentences'].append(sentence.get_dic("all")) output = json.dumps(result) return output else: return json.dumps( {'error': 'could not find document {}'.format(doctag)})
def get_document(self, doctag): # return document entry with doctag cur = self.db_conn.cursor() query = """SELECT distinct id, doctag, title, doctext FROM document WHERE doctag =%s;""" # print "QUERY", query cur.execute(query, (doctag,)) res = cur.fetchone() if res is not None: result = {'docID': res[1], 'title': res[2], 'docText': res[3], 'abstract':{'sentences':[]}} sentences = self.get_sentences(doctag) for s in sentences: sentence = Sentence(s[2], offset=s[3], sid=s[1], did=doctag) sentence.process_corenlp_output(ast.literal_eval(s[4])) sentence = self.get_entities(sentence) result['abstract']['sentences'].append(sentence.get_dic("all")) output = json.dumps(result) return output else: return json.dumps({'error': 'could not find document {}'.format(doctag)})