def extract_abstract_dandelion(db, dataset): mongo = MongoHC(db, dataset) mongo_dbpedia = MongoHC(db, 'dbpedia') docs = [doc for doc in mongo.get_all(order_by='id_doc')] for doc in docs: try: entities = [ e['lod']['dbpedia'] for e in doc['dandelion']['annotations'] ] for e in entities: if mongo_dbpedia.get_element_by_mongo_id(e): logfun.info('Entities already in database') continue dbpedia = {} logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: dbpedia['_id'] = e dbpedia['abstract'] = abstract mongo_dbpedia.save_document(dbpedia) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_entity(db, dataset): mongo_from = MongoHC(db, dataset + '_for_alchemy') mongo_to = MongoHC(db, dataset) docs = mongo_from.get_all(order_by='id_doc') docs = [doc for doc in docs] for doc in docs[:]: logfun.info('#' * 80) logfun.info('Scanning documents: %(id_doc)s' % doc) logfun.info('#' * 80) try: entitySet, annotationsSorted, response = getAnnotation(doc['text']) doc['abstracts'] = [] doc['alchemy_response'] = response for e in entitySet: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) doc['entity_set'] = list(entitySet) mongo_to.save_document(doc) mongo_from.remove_document_by_id(doc['id_doc']) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_abstract_dandelion(db, dataset): mongo = MongoHC(db, dataset) mongo_dbpedia = MongoHC(db, 'dbpedia') docs = [doc for doc in mongo.get_all(order_by='id_doc')] for doc in docs: try: entities = [e['lod']['dbpedia'] for e in doc['dandelion']['annotations']] for e in entities: if mongo_dbpedia.get_element_by_mongo_id(e): logfun.info('Entities already in database') continue dbpedia = {} logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: dbpedia['_id'] = e dbpedia['abstract'] = abstract mongo_dbpedia.save_document(dbpedia) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_entity(db, dataset): mongo_from = MongoHC(db, dataset + '_for_alchemy') mongo_to = MongoHC(db, dataset) docs = mongo_from.get_all(order_by='id_doc') docs = [doc for doc in docs] for doc in docs[:]: logfun.info('#' * 80) logfun.info('Scanning documents: %(id_doc)s' % doc) logfun.info('#' * 80) try: entitySet,annotationsSorted,response = getAnnotation(doc['text']) doc['abstracts'] = [] doc['alchemy_response'] = response for e in entitySet: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) doc['entity_set'] = list(entitySet) mongo_to.save_document(doc) mongo_from.remove_document_by_id(doc['id_doc']) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_alchemy(db, dataset): mongo = MongoHC(db, dataset) docs = [doc for doc in mongo.get_doc_with_no_key('alchemy_response')] for doc in docs: try: entitySet, annotationsSorted, response = getAnnotation(doc['text']) doc['alchemy_response'] = response mongo.save_document(doc) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_dandelion(db, dataset): mongo = MongoHC(db, dataset) docs = [doc for doc in mongo.get_doc_with_no_key('dandelion', order_by='id_doc')] for doc in docs: try: dan = get_entities_from_dandelion(doc['text']) logfun.info(dan['timestamp']) doc['dandelion'] = dan mongo.save_document(doc) except Exception, e: logfun.error(traceback.format_exc())
def extract_alchemy(db, dataset): mongo = MongoHC(db, dataset) docs = [doc for doc in mongo.get_doc_with_no_key('alchemy_response')] for doc in docs: try: entitySet,annotationsSorted,response = getAnnotation(doc['text']) doc['alchemy_response'] = response mongo.save_document(doc) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def test_fabio(db, dataset, gamma=0.5, ranking_metric='pr', lsa=False, save=False): mongo_result = MongoHC(db, 'test_fabio') result = clf.cluster_fabio(db, dataset, gamma=gamma, ranking_metric=ranking_metric, with_lsa=lsa) if save: mongo_result.save_document(result) pp.pprint(result)
def extract_dandelion(db, dataset): mongo = MongoHC(db, dataset) docs = [ doc for doc in mongo.get_doc_with_no_key('dandelion', order_by='id_doc') ] for doc in docs: try: dan = get_entities_from_dandelion(doc['text']) logfun.info(dan['timestamp']) doc['dandelion'] = dan mongo.save_document(doc) except Exception, e: logfun.error(traceback.format_exc())
def extract_abstract(db, dataset): mongo = MongoHC(db, dataset) docs = [doc for doc in mongo.get_empty_abstract()] for doc in docs: try: for e in doc['entity_set']: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) mongo.save_document(doc) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def extract_abstract(db, dataset): mongo = MongoHC(db, dataset) docs = [doc for doc in mongo.get_empty_abstract()] for doc in docs: try: for e in doc['entity_set']: logfun.info('Extracting abstract for entity %s' % e) abstract = get_abstract(e) if abstract: doc['abstracts'].append(abstract) else: logfun.warning('Abstract not found!') logfun.info('-' * 80) mongo.save_document(doc) except Exception, e: logfun.error("Something awful happened!") logfun.error(e) logfun.error(sys.exc_info()[2])
def test_fabio(db, dataset, gamma=0.5, ranking_metric="pr", lsa=False, save=False): mongo_result = MongoHC(db, "test_fabio") result = clf.cluster_fabio(db, dataset, gamma=gamma, ranking_metric=ranking_metric, with_lsa=lsa) if save: mongo_result.save_document(result) pp.pprint(result)