def extract_entity(db, dataset): mongo_from = MongoHC(db, dataset + '_for_alchemy') mongo_to = MongoHC(db, dataset) docs = mongo_from.get_all(order_by='id_doc') docs = [doc for doc in docs] for doc in docs[:]: logfun.info('#' * 80) logfun.info('Scanning documents: %(id_doc)s' % doc) logfun.info('#' * 80) try: entitySet, annotationsSorted, response = getAnnotation(doc['text']) doc['abstracts'] = [] doc['alchemy_response'] = response for e in entitySet: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) doc['entity_set'] = list(entitySet) mongo_to.save_document(doc) mongo_from.remove_document_by_id(doc['id_doc']) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_entity(db, dataset): mongo_from = MongoHC(db, dataset + '_for_alchemy') mongo_to = MongoHC(db, dataset) docs = mongo_from.get_all(order_by='id_doc') docs = [doc for doc in docs] for doc in docs[:]: logfun.info('#' * 80) logfun.info('Scanning documents: %(id_doc)s' % doc) logfun.info('#' * 80) try: entitySet,annotationsSorted,response = getAnnotation(doc['text']) doc['abstracts'] = [] doc['alchemy_response'] = response for e in entitySet: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) doc['entity_set'] = list(entitySet) mongo_to.save_document(doc) mongo_from.remove_document_by_id(doc['id_doc']) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])