def extract_abstract_dandelion(db, dataset):
    mongo = MongoHC(db, dataset)
    mongo_dbpedia = MongoHC(db, 'dbpedia')
    docs = [doc for doc in mongo.get_all(order_by='id_doc')]

    for doc in docs:
        try:
            entities = [
                e['lod']['dbpedia'] for e in doc['dandelion']['annotations']
            ]
            for e in entities:
                if mongo_dbpedia.get_element_by_mongo_id(e):
                    logfun.info('Entities already in database')
                    continue
                dbpedia = {}
                logfun.info('Extracting abstract for entity %s' % e)
                abstract = get_abstract(e)
                if abstract:
                    dbpedia['_id'] = e
                    dbpedia['abstract'] = abstract
                    mongo_dbpedia.save_document(dbpedia)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_entity(db, dataset):
    mongo_from = MongoHC(db, dataset + '_for_alchemy')
    mongo_to = MongoHC(db, dataset)

    docs = mongo_from.get_all(order_by='id_doc')
    docs = [doc for doc in docs]

    for doc in docs[:]:
        logfun.info('#' * 80)
        logfun.info('Scanning documents: %(id_doc)s' % doc)
        logfun.info('#' * 80)
        try:
            entitySet, annotationsSorted, response = getAnnotation(doc['text'])
            doc['abstracts'] = []
            doc['alchemy_response'] = response
            for e in entitySet:
                logfun.info('Extracting abstract for entity %s' % e)

                abstract = get_abstract(e)
                if abstract:
                    doc['abstracts'].append(abstract)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)

            doc['entity_set'] = list(entitySet)
            mongo_to.save_document(doc)
            mongo_from.remove_document_by_id(doc['id_doc'])
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_abstract_dandelion(db, dataset):
    mongo = MongoHC(db, dataset)
    mongo_dbpedia = MongoHC(db, 'dbpedia')
    docs = [doc for doc in mongo.get_all(order_by='id_doc')]

    for doc in docs:
        try:
            entities = [e['lod']['dbpedia'] for e in doc['dandelion']['annotations']]
            for e in entities:
                if mongo_dbpedia.get_element_by_mongo_id(e):
                    logfun.info('Entities already in database')
                    continue
                dbpedia = {}
                logfun.info('Extracting abstract for entity %s' % e)
                abstract = get_abstract(e)
                if abstract:
                  dbpedia['_id'] = e
                  dbpedia['abstract'] = abstract
                  mongo_dbpedia.save_document(dbpedia)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_entity(db, dataset):
    mongo_from = MongoHC(db, dataset + '_for_alchemy')
    mongo_to = MongoHC(db, dataset)

    docs = mongo_from.get_all(order_by='id_doc')
    docs = [doc for doc in docs]

    for doc in docs[:]:
        logfun.info('#' * 80)
        logfun.info('Scanning documents: %(id_doc)s' % doc)
        logfun.info('#' * 80)
        try:
            entitySet,annotationsSorted,response = getAnnotation(doc['text'])
            doc['abstracts'] = []
            doc['alchemy_response'] = response
            for e in entitySet:
                logfun.info('Extracting abstract for entity %s' % e)

                abstract = get_abstract(e)
                if abstract:
                    doc['abstracts'].append(abstract)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)

            doc['entity_set'] = list(entitySet)
            mongo_to.save_document(doc)
            mongo_from.remove_document_by_id(doc['id_doc'])
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_alchemy(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [doc for doc in mongo.get_doc_with_no_key('alchemy_response')]

    for doc in docs:
        try:
            entitySet, annotationsSorted, response = getAnnotation(doc['text'])
            doc['alchemy_response'] = response
            mongo.save_document(doc)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_dandelion(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [doc for doc in mongo.get_doc_with_no_key('dandelion',
                                                     order_by='id_doc')]

    for doc in docs:
        try:
            dan = get_entities_from_dandelion(doc['text'])
            logfun.info(dan['timestamp'])
            doc['dandelion'] = dan
            mongo.save_document(doc)
        except Exception, e:
            logfun.error(traceback.format_exc())
def extract_alchemy(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [doc for doc in mongo.get_doc_with_no_key('alchemy_response')]

    for doc in docs:
        try:
            entitySet,annotationsSorted,response = getAnnotation(doc['text'])
            doc['alchemy_response'] = response
            mongo.save_document(doc)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
Exemple #8
0
def test_fabio(db,
               dataset,
               gamma=0.5,
               ranking_metric='pr',
               lsa=False,
               save=False):
    mongo_result = MongoHC(db, 'test_fabio')
    result = clf.cluster_fabio(db,
                               dataset,
                               gamma=gamma,
                               ranking_metric=ranking_metric,
                               with_lsa=lsa)
    if save:
        mongo_result.save_document(result)
    pp.pprint(result)
def extract_dandelion(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [
        doc
        for doc in mongo.get_doc_with_no_key('dandelion', order_by='id_doc')
    ]

    for doc in docs:
        try:
            dan = get_entities_from_dandelion(doc['text'])
            logfun.info(dan['timestamp'])
            doc['dandelion'] = dan
            mongo.save_document(doc)
        except Exception, e:
            logfun.error(traceback.format_exc())
def extract_abstract(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [doc for doc in mongo.get_empty_abstract()]

    for doc in docs:
        try:
            for e in doc['entity_set']:
                logfun.info('Extracting abstract for entity %s' % e)
                abstract = get_abstract(e)
                if abstract:
                    doc['abstracts'].append(abstract)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)

            mongo.save_document(doc)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
def extract_abstract(db, dataset):
    mongo = MongoHC(db, dataset)

    docs = [doc for doc in mongo.get_empty_abstract()]

    for doc in docs:
        try:
            for e in doc['entity_set']:
                logfun.info('Extracting abstract for entity %s' % e)
                abstract = get_abstract(e)
                if abstract:
                    doc['abstracts'].append(abstract)
                else:
                    logfun.warning('Abstract not found!')
                logfun.info('-' * 80)

            mongo.save_document(doc)
        except Exception, e:
            logfun.error("Something awful happened!")
            logfun.error(e)
            logfun.error(sys.exc_info()[2])
Exemple #12
0
def test_fabio(db, dataset, gamma=0.5, ranking_metric="pr", lsa=False, save=False):
    mongo_result = MongoHC(db, "test_fabio")
    result = clf.cluster_fabio(db, dataset, gamma=gamma, ranking_metric=ranking_metric, with_lsa=lsa)
    if save:
        mongo_result.save_document(result)
    pp.pprint(result)