def main(user_email, url_api_collection, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file=None, redis_host=None, redis_port=None, redis_pswd=None, redis_timeout=600, rq_queue=None, run_image_harvest=False): '''Runs a UCLDC ingest process for the given collection''' cleanup_work_dir() # remove files from /tmp emails = [user_email] if EMAIL_SYS_ADMIN: emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')]) if not mail_handler: mail_handler = logbook.MailHandler( EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True) mail_handler.push_application() if not config_file: config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini') if not (redis_host and redis_port and redis_pswd): config = config_harvest(config_file=config_file) try: collection = Collection(url_api_collection) except Exception, e: msg = 'Exception in Collection {}, init {}'.format(url_api_collection, str(e)) logbook.error(msg) raise e
def main(collection_keys, fieldName, newValue, substring=None, log_handler=None, config_file='akara.ini', rq_queue=None, **kwargs): config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() for collection_key in [x for x in collection_keys.split(';')]: queue_update_couchdb_field(config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, fieldName=fieldName, substring=substring, newValue=newValue, collection_key=collection_key, **kwargs) log_handler.pop_application()
def main(collection_keys, log_handler=None, config_file="akara.ini", rq_queue=None, **kwargs): """Runs a UCLDC sync to solr for collection key""" config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level="DEBUG") log_handler.push_application() for collection_key in [x for x in collection_keys.split(";")]: queue_sync_to_solr( config["redis_host"], config["redis_port"], config["redis_password"], config["redis_connect_timeout"], rq_queue=rq_queue, collection_key=collection_key, **kwargs ) log_handler.pop_application()
def main(collection_keys, log_handler=None, config_file='akara.ini', rq_queue=None, **kwargs): '''Runs a UCLDC sync to solr for collection key''' config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() for collection_key in [x for x in collection_keys.split(';')]: queue_delete_couchdb_collection(config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, collection_key=collection_key, **kwargs) log_handler.pop_application()
def main(item_id, log_handler=None, config_file='akara.ini', rq_queue=None, **kwargs): '''Runs a UCLDC delete from solr for collection key''' config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() queue_delete_item_from_solr(config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, item_id=item_id, **kwargs) log_handler.pop_application()
def main(user_email, url_api_collections, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file='akara.ini', rq_queue=None, **kwargs): '''Runs a UCLDC ingest process for the given collection''' emails = [user_email] if EMAIL_SYS_ADMIN: emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')]) if not mail_handler: mail_handler = logbook.MailHandler( EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True) mail_handler.push_application() config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() for url_api_collection in [x for x in url_api_collections.split(';')]: try: collection = Collection(url_api_collection) except Exception, e: msg = 'Exception in Collection {}, init {}'.format( url_api_collection, str(e)) logbook.error(msg) raise e queue_image_harvest( config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, collection_key=collection.id, object_auth=collection.auth, **kwargs)
def main(user_email, url_api_collections, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file='akara.ini', rq_queue=None, **kwargs): '''Runs a UCLDC ingest process for the given collection''' emails = [user_email] if EMAIL_SYS_ADMIN: emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')]) if not mail_handler: mail_handler = logbook.MailHandler(EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True) mail_handler.push_application() config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() for url_api_collection in [x for x in url_api_collections.split(';')]: try: collection = Collection(url_api_collection) except Exception, e: msg = 'Exception in Collection {}, init {}'.format( url_api_collection, str(e)) logbook.error(msg) raise e queue_image_harvest(config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, collection_key=collection.id, object_auth=collection.auth, **kwargs)
def main(collection_keys, log_handler=None, config_file='akara.ini', rq_queue=None, **kwargs): '''Runs a UCLDC sync to solr for collection key''' config = config_harvest(config_file=config_file) if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() for collection_key in [x for x in collection_keys.split(';')]: queue_delete_couchdb_collection( config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], rq_queue=rq_queue, collection_key=collection_key, **kwargs) log_handler.pop_application()
def main(user_email, url_api_collection, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file=None, redis_host=None, redis_port=None, redis_pswd=None, redis_timeout=600, rq_queue=None, run_image_harvest=False, **kwargs): '''Runs a UCLDC ingest process for the given collection''' cleanup_work_dir() # remove files from /tmp emails = [user_email] if EMAIL_SYS_ADMIN: emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')]) if not mail_handler: mail_handler = logbook.MailHandler(EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True) mail_handler.push_application() if not config_file: config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini') if not (redis_host and redis_port and redis_pswd): config = config_harvest(config_file=config_file) try: collection = Collection(url_api_collection) except Exception as e: msg = 'Exception in Collection {}, init {}'.format( url_api_collection, str(e)) logbook.error(msg) raise e if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() logger = logbook.Logger('run_ingest') ingest_doc_id, num_recs, dir_save, harvester = fetcher.main( emails, url_api_collection, log_handler=log_handler, mail_handler=mail_handler, **kwargs) if 'prod' in os.environ['DATA_BRANCH'].lower(): if not collection.ready_for_publication: raise Exception(''.join( ('Collection {} is not ready for publication.', ' Run on stage and QA first, then set', ' ready_for_publication')).format(collection.id)) logger.info("INGEST DOC ID:{0}".format(ingest_doc_id)) logger.info('HARVESTED {0} RECORDS'.format(num_recs)) logger.info('IN DIR:{0}'.format(dir_save)) resp = enrich_records.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error enriching records {0}".format(resp)) raise Exception('Failed during enrichment process: {0}'.format(resp)) logger.info('Enriched records') resp = save_records.main([None, ingest_doc_id]) if not resp >= 0: logger.error("Error saving records {0}".format(str(resp))) raise Exception("Error saving records {0}".format(str(resp))) num_saved = resp logger.info("SAVED RECS : {}".format(num_saved)) resp = remove_deleted_records.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error deleting records {0}".format(resp)) raise Exception("Error deleting records {0}".format(resp)) resp = check_ingestion_counts.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error checking counts {0}".format(resp)) raise Exception("Error checking counts {0}".format(resp)) resp = dashboard_cleanup.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error cleaning up dashboard {0}".format(resp)) raise Exception("Error cleaning up dashboard {0}".format(resp)) subject = format_results_subject(collection.id, 'Harvest to CouchDB {env} ') publish_to_harvesting( subject, 'Finished metadata harvest for CID: {}\n' 'Fetched: {}\nSaved: {}'.format(collection.id, num_recs, num_saved)) log_handler.pop_application() mail_handler.pop_application()
raise Exception("Error checking counts {0}".format(resp)) resp = dashboard_cleanup.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error cleaning up dashboard {0}".format(resp)) raise Exception("Error cleaning up dashboard {0}".format(resp)) subject = format_results_subject(collection.id, 'Harvest to CouchDB {env} ') publish_to_harvesting( subject, 'Finished metadata harvest for CID: {}\n' 'Fetched: {}\nSaved: {}'.format(collection.id, num_recs, num_saved)) log_handler.pop_application() mail_handler.pop_application() if __name__ == '__main__': parser = def_args() args = parser.parse_args(sys.argv[1:]) if not args.user_email or not args.url_api_collection or not args.rq_queue: parser.print_help() sys.exit(27) conf = config_harvest() main(args.user_email, args.url_api_collection, redis_host=conf['redis_host'], redis_port=conf['redis_port'], redis_pswd=conf['redis_password'], redis_timeout=conf['redis_connect_timeout'], rq_queue=args.rq_queue)
job = queue_image_harvest( config['redis_host'], config['redis_port'], config['redis_password'], config['redis_connect_timeout'], config['couchdb_url'], collection.id, rq_queue, object_auth=collection.auth) logger.info("Started job for image_harvest:{}".format(job.result)) log_handler.pop_application() mail_handler.pop_application() if __name__ == '__main__': parser = def_args() args = parser.parse_args(sys.argv[1:]) if not args.user_email or not args.url_api_collection or not args.rq_queue: parser.print_help() sys.exit(27) conf = config_harvest() main( args.user_email, args.url_api_collection, redis_host=conf['redis_host'], redis_port=conf['redis_port'], redis_pswd=conf['redis_password'], redis_timeout=conf['redis_connect_timeout'], rq_queue=args.rq_queue)
def main(user_email, url_api_collection, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file=None, redis_host=None, redis_port=None, redis_pswd=None, redis_timeout=600, rq_queue=None, run_image_harvest=False, **kwargs): '''Runs a UCLDC ingest process for the given collection''' cleanup_work_dir() # remove files from /tmp emails = [user_email] if EMAIL_SYS_ADMIN: emails.extend([u for u in EMAIL_SYS_ADMIN.split(',')]) if not mail_handler: mail_handler = logbook.MailHandler( EMAIL_RETURN_ADDRESS, emails, level='ERROR', bubble=True) mail_handler.push_application() if not config_file: config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini') if not (redis_host and redis_port and redis_pswd): config = config_harvest(config_file=config_file) try: collection = Collection(url_api_collection) except Exception as e: msg = 'Exception in Collection {}, init {}'.format(url_api_collection, str(e)) logbook.error(msg) raise e if not log_handler: log_handler = logbook.StderrHandler(level='DEBUG') log_handler.push_application() logger = logbook.Logger('run_ingest') ingest_doc_id, num_recs, dir_save, harvester = fetcher.main( emails, url_api_collection, log_handler=log_handler, mail_handler=mail_handler, **kwargs) if 'prod' in os.environ['DATA_BRANCH'].lower(): if not collection.ready_for_publication: raise Exception(''.join( ('Collection {} is not ready for publication.', ' Run on stage and QA first, then set', ' ready_for_publication')).format(collection.id)) logger.info("INGEST DOC ID:{0}".format(ingest_doc_id)) logger.info('HARVESTED {0} RECORDS'.format(num_recs)) logger.info('IN DIR:{0}'.format(dir_save)) resp = enrich_records.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error enriching records {0}".format(resp)) raise Exception('Failed during enrichment process: {0}'.format(resp)) logger.info('Enriched records') resp = save_records.main([None, ingest_doc_id]) if not resp >= 0: logger.error("Error saving records {0}".format(str(resp))) raise Exception("Error saving records {0}".format(str(resp))) num_saved = resp logger.info("SAVED RECS : {}".format(num_saved)) resp = remove_deleted_records.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error deleting records {0}".format(resp)) raise Exception("Error deleting records {0}".format(resp)) resp = check_ingestion_counts.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error checking counts {0}".format(resp)) raise Exception("Error checking counts {0}".format(resp)) resp = dashboard_cleanup.main([None, ingest_doc_id]) if not resp == 0: logger.error("Error cleaning up dashboard {0}".format(resp)) raise Exception("Error cleaning up dashboard {0}".format(resp)) subject = format_results_subject(collection.id, 'Harvest to CouchDB {env} ') publish_to_harvesting(subject, 'Finished metadata harvest for CID: {}\n' 'Fetched: {}\nSaved: {}'.format( collection.id, num_recs, num_saved)) log_handler.pop_application() mail_handler.pop_application()