Example #1
0
def main(user_email,
         url_api_collection,
         log_handler=None,
         mail_handler=None,
         dir_profile='profiles',
         profile_path=None,
         config_file=None,
         redis_host=None,
         redis_port=None,
         redis_pswd=None,
         redis_timeout=600,
         rq_queue=None,
         run_image_harvest=False):
    '''Runs a UCLDC ingest process for the given collection'''
    cleanup_work_dir()  # remove files from /tmp
    emails = [user_email]
    if EMAIL_SYS_ADMIN:
        emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')])
    if not mail_handler:
        mail_handler = logbook.MailHandler(
            EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True)
    mail_handler.push_application()
    if not config_file:
        config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini')
    if not (redis_host and redis_port and redis_pswd):
        config = config_harvest(config_file=config_file)

    try:
        collection = Collection(url_api_collection)
    except Exception, e:
        msg = 'Exception in Collection {}, init {}'.format(url_api_collection,
                                                           str(e))
        logbook.error(msg)
        raise e
def main(collection_keys,
         fieldName,
         newValue,
         substring=None,
         log_handler=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    config = config_harvest(config_file=config_file)

    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')
    log_handler.push_application()
    for collection_key in [x for x in collection_keys.split(';')]:
        queue_update_couchdb_field(config['redis_host'],
                                   config['redis_port'],
                                   config['redis_password'],
                                   config['redis_connect_timeout'],
                                   rq_queue=rq_queue,
                                   fieldName=fieldName,
                                   substring=substring,
                                   newValue=newValue,
                                   collection_key=collection_key,
                                   **kwargs)

    log_handler.pop_application()
Example #3
0
def main(collection_keys, log_handler=None, config_file="akara.ini", rq_queue=None, **kwargs):
    """Runs a UCLDC sync to solr for collection key"""
    config = config_harvest(config_file=config_file)

    if not log_handler:
        log_handler = logbook.StderrHandler(level="DEBUG")
    log_handler.push_application()
    for collection_key in [x for x in collection_keys.split(";")]:
        queue_sync_to_solr(
            config["redis_host"],
            config["redis_port"],
            config["redis_password"],
            config["redis_connect_timeout"],
            rq_queue=rq_queue,
            collection_key=collection_key,
            **kwargs
        )

    log_handler.pop_application()
def main(collection_keys,
         log_handler=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    '''Runs a UCLDC sync to solr for collection key'''
    config = config_harvest(config_file=config_file)

    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')
    log_handler.push_application()
    for collection_key in [x for x in collection_keys.split(';')]:
        queue_delete_couchdb_collection(config['redis_host'],
                                        config['redis_port'],
                                        config['redis_password'],
                                        config['redis_connect_timeout'],
                                        rq_queue=rq_queue,
                                        collection_key=collection_key,
                                        **kwargs)

    log_handler.pop_application()
def main(item_id,
         log_handler=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    '''Runs a UCLDC delete from solr for collection key'''
    config = config_harvest(config_file=config_file)

    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')

    log_handler.push_application()
    queue_delete_item_from_solr(config['redis_host'],
                                config['redis_port'],
                                config['redis_password'],
                                config['redis_connect_timeout'],
                                rq_queue=rq_queue,
                                item_id=item_id,
                                **kwargs)

    log_handler.pop_application()
Example #6
0
def main(user_email,
         url_api_collections,
         log_handler=None,
         mail_handler=None,
         dir_profile='profiles',
         profile_path=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    '''Runs a UCLDC ingest process for the given collection'''
    emails = [user_email]
    if EMAIL_SYS_ADMIN:
        emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')])
    if not mail_handler:
        mail_handler = logbook.MailHandler(
            EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True)
    mail_handler.push_application()
    config = config_harvest(config_file=config_file)
    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')
    log_handler.push_application()

    for url_api_collection in [x for x in url_api_collections.split(';')]:
        try:
            collection = Collection(url_api_collection)
        except Exception, e:
            msg = 'Exception in Collection {}, init {}'.format(
                    url_api_collection,
                    str(e))
            logbook.error(msg)
            raise e
        queue_image_harvest(
            config['redis_host'],
            config['redis_port'],
            config['redis_password'],
            config['redis_connect_timeout'],
            rq_queue=rq_queue,
            collection_key=collection.id,
            object_auth=collection.auth,
            **kwargs)
Example #7
0
def main(user_email,
         url_api_collections,
         log_handler=None,
         mail_handler=None,
         dir_profile='profiles',
         profile_path=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    '''Runs a UCLDC ingest process for the given collection'''
    emails = [user_email]
    if EMAIL_SYS_ADMIN:
        emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')])
    if not mail_handler:
        mail_handler = logbook.MailHandler(EMAIL_RETURN_ADDRESS,
                                           emails,
                                           level='ERROR',
                                           bubble=True)
    mail_handler.push_application()
    config = config_harvest(config_file=config_file)
    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')
    log_handler.push_application()

    for url_api_collection in [x for x in url_api_collections.split(';')]:
        try:
            collection = Collection(url_api_collection)
        except Exception, e:
            msg = 'Exception in Collection {}, init {}'.format(
                url_api_collection, str(e))
            logbook.error(msg)
            raise e
        queue_image_harvest(config['redis_host'],
                            config['redis_port'],
                            config['redis_password'],
                            config['redis_connect_timeout'],
                            rq_queue=rq_queue,
                            collection_key=collection.id,
                            object_auth=collection.auth,
                            **kwargs)
def main(collection_keys,
         log_handler=None,
         config_file='akara.ini',
         rq_queue=None,
         **kwargs):
    '''Runs a UCLDC sync to solr for collection key'''
    config = config_harvest(config_file=config_file)

    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')
    log_handler.push_application()
    for collection_key in [x for x in collection_keys.split(';')]:
        queue_delete_couchdb_collection(
            config['redis_host'],
            config['redis_port'],
            config['redis_password'],
            config['redis_connect_timeout'],
            rq_queue=rq_queue,
            collection_key=collection_key,
            **kwargs)

    log_handler.pop_application()
Example #9
0
def main(user_email,
         url_api_collection,
         log_handler=None,
         mail_handler=None,
         dir_profile='profiles',
         profile_path=None,
         config_file=None,
         redis_host=None,
         redis_port=None,
         redis_pswd=None,
         redis_timeout=600,
         rq_queue=None,
         run_image_harvest=False,
         **kwargs):
    '''Runs a UCLDC ingest process for the given collection'''
    cleanup_work_dir()  # remove files from /tmp
    emails = [user_email]
    if EMAIL_SYS_ADMIN:
        emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')])
    if not mail_handler:
        mail_handler = logbook.MailHandler(EMAIL_RETURN_ADDRESS,
                                           emails,
                                           level='ERROR',
                                           bubble=True)
    mail_handler.push_application()
    if not config_file:
        config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini')
    if not (redis_host and redis_port and redis_pswd):
        config = config_harvest(config_file=config_file)

    try:
        collection = Collection(url_api_collection)
    except Exception as e:
        msg = 'Exception in Collection {}, init {}'.format(
            url_api_collection, str(e))
        logbook.error(msg)
        raise e
    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')

    log_handler.push_application()
    logger = logbook.Logger('run_ingest')
    ingest_doc_id, num_recs, dir_save, harvester = fetcher.main(
        emails,
        url_api_collection,
        log_handler=log_handler,
        mail_handler=mail_handler,
        **kwargs)
    if 'prod' in os.environ['DATA_BRANCH'].lower():
        if not collection.ready_for_publication:
            raise Exception(''.join(
                ('Collection {} is not ready for publication.',
                 ' Run on stage and QA first, then set',
                 ' ready_for_publication')).format(collection.id))
    logger.info("INGEST DOC ID:{0}".format(ingest_doc_id))
    logger.info('HARVESTED {0} RECORDS'.format(num_recs))
    logger.info('IN DIR:{0}'.format(dir_save))
    resp = enrich_records.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error enriching records {0}".format(resp))
        raise Exception('Failed during enrichment process: {0}'.format(resp))
    logger.info('Enriched records')

    resp = save_records.main([None, ingest_doc_id])
    if not resp >= 0:
        logger.error("Error saving records {0}".format(str(resp)))
        raise Exception("Error saving records {0}".format(str(resp)))
    num_saved = resp
    logger.info("SAVED RECS : {}".format(num_saved))

    resp = remove_deleted_records.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error deleting records {0}".format(resp))
        raise Exception("Error deleting records {0}".format(resp))

    resp = check_ingestion_counts.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error checking counts {0}".format(resp))
        raise Exception("Error checking counts {0}".format(resp))

    resp = dashboard_cleanup.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error cleaning up dashboard {0}".format(resp))
        raise Exception("Error cleaning up dashboard {0}".format(resp))
    subject = format_results_subject(collection.id,
                                     'Harvest to CouchDB {env} ')
    publish_to_harvesting(
        subject, 'Finished metadata harvest for CID: {}\n'
        'Fetched: {}\nSaved: {}'.format(collection.id, num_recs, num_saved))

    log_handler.pop_application()
    mail_handler.pop_application()
Example #10
0
        raise Exception("Error checking counts {0}".format(resp))

    resp = dashboard_cleanup.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error cleaning up dashboard {0}".format(resp))
        raise Exception("Error cleaning up dashboard {0}".format(resp))
    subject = format_results_subject(collection.id,
                                     'Harvest to CouchDB {env} ')
    publish_to_harvesting(
        subject, 'Finished metadata harvest for CID: {}\n'
        'Fetched: {}\nSaved: {}'.format(collection.id, num_recs, num_saved))

    log_handler.pop_application()
    mail_handler.pop_application()


if __name__ == '__main__':
    parser = def_args()
    args = parser.parse_args(sys.argv[1:])
    if not args.user_email or not args.url_api_collection or not args.rq_queue:
        parser.print_help()
        sys.exit(27)
    conf = config_harvest()
    main(args.user_email,
         args.url_api_collection,
         redis_host=conf['redis_host'],
         redis_port=conf['redis_port'],
         redis_pswd=conf['redis_password'],
         redis_timeout=conf['redis_connect_timeout'],
         rq_queue=args.rq_queue)
Example #11
0
        job = queue_image_harvest(
            config['redis_host'],
            config['redis_port'],
            config['redis_password'],
            config['redis_connect_timeout'],
            config['couchdb_url'],
            collection.id,
            rq_queue,
            object_auth=collection.auth)
        logger.info("Started job for image_harvest:{}".format(job.result))

    log_handler.pop_application()
    mail_handler.pop_application()


if __name__ == '__main__':
    parser = def_args()
    args = parser.parse_args(sys.argv[1:])
    if not args.user_email or not args.url_api_collection or not args.rq_queue:
        parser.print_help()
        sys.exit(27)
    conf = config_harvest()
    main(
        args.user_email,
        args.url_api_collection,
        redis_host=conf['redis_host'],
        redis_port=conf['redis_port'],
        redis_pswd=conf['redis_password'],
        redis_timeout=conf['redis_connect_timeout'],
        rq_queue=args.rq_queue)
Example #12
0
def main(user_email,
         url_api_collection,
         log_handler=None,
         mail_handler=None,
         dir_profile='profiles',
         profile_path=None,
         config_file=None,
         redis_host=None,
         redis_port=None,
         redis_pswd=None,
         redis_timeout=600,
         rq_queue=None,
         run_image_harvest=False,
         **kwargs):
    '''Runs a UCLDC ingest process for the given collection'''
    cleanup_work_dir()  # remove files from /tmp
    emails = [user_email]
    if EMAIL_SYS_ADMIN:
        emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')])
    if not mail_handler:
        mail_handler = logbook.MailHandler(
            EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True)
    mail_handler.push_application()
    if not config_file:
        config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini')
    if not (redis_host and redis_port and redis_pswd):
        config = config_harvest(config_file=config_file)

    try:
        collection = Collection(url_api_collection)
    except Exception as e:
        msg = 'Exception in Collection {}, init {}'.format(url_api_collection,
                                                           str(e))
        logbook.error(msg)
        raise e
    if not log_handler:
        log_handler = logbook.StderrHandler(level='DEBUG')

    log_handler.push_application()
    logger = logbook.Logger('run_ingest')
    ingest_doc_id, num_recs, dir_save, harvester = fetcher.main(
        emails,
        url_api_collection,
        log_handler=log_handler,
        mail_handler=mail_handler,
        **kwargs)
    if 'prod' in os.environ['DATA_BRANCH'].lower():
        if not collection.ready_for_publication:
            raise Exception(''.join(
                ('Collection {} is not ready for publication.',
                 ' Run on stage and QA first, then set',
                 ' ready_for_publication')).format(collection.id))
    logger.info("INGEST DOC ID:{0}".format(ingest_doc_id))
    logger.info('HARVESTED {0} RECORDS'.format(num_recs))
    logger.info('IN DIR:{0}'.format(dir_save))
    resp = enrich_records.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error enriching records {0}".format(resp))
        raise Exception('Failed during enrichment process: {0}'.format(resp))
    logger.info('Enriched records')

    resp = save_records.main([None, ingest_doc_id])
    if not resp >= 0:
        logger.error("Error saving records {0}".format(str(resp)))
        raise Exception("Error saving records {0}".format(str(resp)))
    num_saved = resp
    logger.info("SAVED RECS : {}".format(num_saved))

    resp = remove_deleted_records.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error deleting records {0}".format(resp))
        raise Exception("Error deleting records {0}".format(resp))

    resp = check_ingestion_counts.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error checking counts {0}".format(resp))
        raise Exception("Error checking counts {0}".format(resp))

    resp = dashboard_cleanup.main([None, ingest_doc_id])
    if not resp == 0:
        logger.error("Error cleaning up dashboard {0}".format(resp))
        raise Exception("Error cleaning up dashboard {0}".format(resp))
    subject = format_results_subject(collection.id,
                                     'Harvest to CouchDB {env} ')
    publish_to_harvesting(subject,
                          'Finished metadata harvest for CID: {}\n'
                          'Fetched: {}\nSaved: {}'.format(
                              collection.id,
                              num_recs,
                              num_saved))

    log_handler.pop_application()
    mail_handler.pop_application()