def run_delete_via_bdr_item_api( pid ): """ Calls to remove pid from bdr. """ assert type(pid) == unicode reindexer = CustomReindexer( bell_logger.setup_logger() ) item_api_url = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_URL'] ) identity = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_AUTH_NAME'] ) auth_code = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_AUTH_KEY'] ) reindexer.delete_via_bdr_item_api( pid, item_api_url, identity, auth_code ) return
def run_remove_pid_from_custom_index( pid ): """ Calls to remove pid from custom bell index. """ assert type(pid) == unicode reindexer = CustomReindexer( bell_logger.setup_logger() ) reindexer.remove_pid_from_custom_index( pid ) bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_delete_via_bdr_item_api', kwargs={ 'pid': pid } ) return
def run_add_image_datastream( data ): """ Runner for add_image_datastream(). Called by queue-job triggered by tasks.task_manager.determine_next_task(). """ assert sorted( data.keys() ) == ['item_data', 'pid', 'update_image'], sorted( data.keys() ) logger = bell_logger.setup_logger() ih = ImageHandler( data, logger ) ih.add_image_datastream() task_manager.determine_next_task( current_task=unicode(sys._getframe().f_code.co_name), logger=logger, data=data ) return
def run_make_pid_list_from_bdr_data(): """ Calls for a list of pids for the given collection_pid. """ bdr_collection_pid=os.environ['BELL_ANTP__COLLECTION_PID'] bdr_search_api_root=os.environ['BELL_ANTP__SOLR_ROOT'] reindexer = CustomReindexer( bell_logger.setup_logger() ) collection_pids = reindexer.make_pid_list( bdr_collection_pid, bdr_search_api_root ) bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_make_pids_to_remove', kwargs={ 'pids_from_collection': collection_pids } ) return
def run_check_update_image( data ): """ Runner for check_update_image(). Called by queue-job triggered by tasks.task_manager.determine_next_task(). """ assert sorted( data.keys() ) == ['item_data', 'pid'], sorted( data.keys() ) logger = bell_logger.setup_logger() ih = ImageHandler( data, logger ) update_image = ih.check_update_image() task_manager.determine_next_task( current_task=unicode(sys._getframe().f_code.co_name), logger=logger, data={ 'item_data': data['item_data'], 'pid': data['pid'], 'update_image': update_image } ) return
def run_make_pid_dict_from_bell_data(): """ Calls to create a json file containing an accession_number-to-pid dict. """ bdr_collection_pid=os.environ['BELL_ANTP__COLLECTION_PID'] fmpro_json_path=os.environ['BELL_ANTP__BELL_DICT_JSON_PATH'] # file of dict of bell-accession-number to metadata bdr_search_api_root=os.environ['BELL_ANTP__SOLR_ROOT'] output_json_path=os.environ['BELL_ANTP__OUTPUT_JSON_PATH'] reindexer = CustomReindexer( bell_logger.setup_logger() ) reindexer.make_pid_dict( bdr_collection_pid, fmpro_json_path, bdr_search_api_root, output_json_path ) bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_make_pid_list_from_bdr_data', kwargs={} ) return
def delete_jp2( data ): """ Cleans up created derivative. """ logger = bell_logger.setup_logger() ( item_data_dict, jp2_path, pid ) = ( data['item_data'], data['jp2_path'], data['pid'] ) assert jp2_path[-4:] == '.jp2' os.remove( jp2_path ) task_manager.determine_next_task( unicode(sys._getframe().f_code.co_name), data={ 'item_data': item_data_dict, 'pid': pid }, logger=logger ) return
def ensure_redis(): """ Checks that redis is running. """ logger = bell_logger.setup_logger() logger.info( 'STARTING_PROCESSING...' ) try: assert len(r.keys()) > -1 # if redis isn't running this will generate an error next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger ) # passes current function name # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 ) logger.info( 'in check_environment.ensure_redis(); redis-check ok' ) return except Exception as e: message = 'in check_environment.ensure_redis(); redis does not appear to be running; exception: %s' % unicode(repr(e)) logger.error( message ) raise Exception( message )
def populate_queue(): """ Puts individual bell items on the queue. """ logger = bell_logger.setup_logger() try: with open( os.environ.get('BELL_CE__BELL_DICT_JSON_PATH') ) as f: all_items_dict = json.loads( f.read() ) for i,(accnum_key, item_dict_value) in enumerate( sorted(all_items_dict['items'].items()) ): determine_next_task( current_task=sys._getframe().f_code.co_name, data=item_dict_value, logger=logger ) if i+2 > int( os.environ.get('BELL_TM__POPULATE_QUEUE_LIMIT') ): # for development logger.debug( 'in task_manager.populate_queue(); breaking after %s' % accnum_key ); break update_tracker( key='GENERAL', message='queue populated' ) logger.info( 'in task_manager.populate_queue(); populate_queue ok' ) return except Exception as e: message = 'in task_manager.populate_queue(); problem in populate_queue(); exception is: %s' % unicode(repr(e)) logger.error( message ) raise Exception( message )
def archive_previous_work(): """ Archives previous redis data. """ logger = bell_logger.setup_logger() try: bell_dir = unicode( os.environ.get('BELL_LOG_DIR') ) now_string = unicode( datetime.datetime.now() ).replace( ' ', '_' ) archive_file_path = '%s/%s.archive' % ( bell_dir, now_string ) jstring = _convert_tracker_to_dict() with open( archive_file_path, 'w' ) as f: f.write( jstring ) next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger ) # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 ) logger.info( 'in check_environment.archive_previous_work(); archive_previous_work ok' ) return except Exception as e: message = 'in check_environment.archive_previous_work(); problem archiving previous work; exception: %s' % unicode(repr(e)) logger.error( message ) raise Exception( message )
def check_foundation_files(): """ Checks that foundation-files exist. """ logger = bell_logger.setup_logger() try: for filepath in [ os.environ.get('BELL_CE__BELL_DICT_JSON_PATH'), os.environ.get('BELL_CE__AccToPidDict_JSON_PATH') ]: try: assert os.path.exists( filepath ) except Exception as e: message = 'Problem finding filepath %s; exception: %s' % ( filepath, unicode(repr(e)) ) logger.error( message ); raise Exception( message ) task_manager.update_tracker( key='GENERAL', message='foundation files ok' ) next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger ) # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 ) logger.info( 'in check_environment.check_foundation_files(); files ok' ) return except Exception as e: message = 'in check_environment.check_foundation_files(); problem checking foundation files; exception: %s' % unicode(repr(e)) logger.error( message ) raise Exception( message )
def ensure_redis_status_dict(): """ Ensures the status dict exists. Resets it if required. Each key's value is a json-serializable list. """ logger = bell_logger.setup_logger() try: OVERWRITE = unicode( os.environ.get('BELL_CE__TRACKER_OVERWRITE') ) tracker_key = 'bell:tracker' if OVERWRITE == 'TRUE': r.delete( tracker_key ) if not r.exists( tracker_key ): message = '%s initialized %s' % ( tracker_key, unicode(datetime.datetime.now()) ) r.hset( tracker_key, 'GENERAL', json.dumps([message]) ) next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger ) # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 ) logger.info( 'in check_environment.ensure_redis_status_dict(); bell_status ok' ) return except Exception as e: message = 'in check_environment.ensure_redis_status_dict(); redis bell_status not set; exception: %s' % unicode(repr(e)) logger.error( message ) raise Exception( message )
def run_start_reindex_all(): """ Starts full custom-reindex process Calls to build a json file of bell latest xml export. Process: - build list of all accession-numbers from latest run. - get pids for those accession-numbers. - (there should be a pid for each accession number) - this is the list of accession-numbers/pids to reindex - build list of all pids in bell collection - build list of all pids not in the to-index list - this is the list of accession-numbers/pids to remove from the index - enqueue all the remove jobs - enqueue all the reindex jobs """ reindexer = CustomReindexer( bell_logger.setup_logger() ) fmpro_xml_path = unicode( os.environ['BELL_ANTD__FMPRO_XML_PATH'] ) fmpro_json_path = unicode( os.environ['BELL_ANTD__JSON_OUTPUT_PATH'] ) reindexer.make_initial_json( fmpro_xml_path, fmpro_json_path ) # just savws to json; nothing returned bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_make_pid_dict_from_bell_data', kwargs={} ) return
def run__update_existing_metadata_and_create_image( data ): """ Takes data-dict; example { item_dict: {title: the-title, acc-num: the-acc-num} }. Instantiates Task() instance & calls update_existing_metadata_and_create_image(). Called by task_manager.determine_next_task(). """ logger = bell_logger.setup_logger() logger.info( 'in fedora_metadata_updater_and_image_builder.run__update_existing_metadata_and_create_image(); starting.' ) print 'in fedora_metadata_and_image_builder.run__update_existing_metadata_and_create_image(); acc_num is: %s' % data['item_dict']['calc_accession_id'] MASTER_IMAGES_DIR_PATH = unicode( os.environ.get('BELL_FMAIB__MASTER_IMAGES_DIR_PATH') ) MASTER_IMAGES_DIR_URL = unicode( os.environ.get('BELL_FMAIB__MASTER_IMAGES_DIR_URL')) JP2_IMAGES_DIR_PATH = unicode( os.environ.get('BELL_FMAIB__JP2_IMAGES_DIR_PATH') ) JP2_IMAGES_DIR_URL = unicode( os.environ.get('BELL_FMAIB__JP2_IMAGES_DIR_URL') ) API_URL = unicode( os.environ.get('BELL_FMUAIB__PRIVATE_API_URL') ) mods_schema_path = os.path.abspath( './lib/mods-3-4.xsd' ) task = Task() task._print_settings( MASTER_IMAGES_DIR_PATH, MASTER_IMAGES_DIR_URL, JP2_IMAGES_DIR_PATH, JP2_IMAGES_DIR_URL, API_URL ) task.update_existing_metadata_and_create_image( MASTER_IMAGES_DIR_PATH, MASTER_IMAGES_DIR_URL, JP2_IMAGES_DIR_PATH, JP2_IMAGES_DIR_URL, data['pid'], data['item_dict'], API_URL, logger ) print 'in edora_metadata_updater_and_image_builder.run__update_existing_metadata_and_create_image(); acc_num is: %s; item ingested' % data['item_dict']['calc_accession_id'] return
def run_make_pids_to_remove( pids_from_collection ): """ Calls for a list of pids to remove. """ assert type(pids_from_collection) == list ( pids_for_accession_number_json_path, reindexer ) = ( unicode(os.environ['BELL_ANTP__OUTPUT_JSON_PATH']), CustomReindexer(bell_logger.setup_logger()) ) pids_to_remove = reindexer.make_pids_to_remove( pids_from_collection, pids_for_accession_number_json_path ) bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_make_pids_to_update', kwargs={} ) for pid in pids_to_remove: bell_q.enqueue_call( func='bell_code.one_offs.rebuild_custom_index.run_remove_pid_from_custom_index', kwargs={ 'pid': pid } ) return
""" Merges a metadata json subset file into a complete json metadata file. - outputs to a new complete json file. - example use-case: - J.C. provides full metadata file. - I find issue with metadata filenames for, say, 50 images. - J.C. fixes filenames in db and exports the 50 metadata records. - I want to merge the 50 updated metadata-records into the complete set of medata records. """ import datetime, json, os, pprint import logging.handlers from bell_code import bell_logger logger = bell_logger.setup_logger() class Merger( object ): def __init__( self ): self.SOURCE_FULL_JSON_METADATA_PATH = unicode( os.environ['BELL_ONEOFF__SOURCE_FULL_JSON_METADATA_PATH'] ) self.SOURCE_SUBSET_JSON_METADATA_PATH = unicode( os.environ['BELL_ONEOFF__SOURCE_SUBSET_JSON_METADATA_PATH'] ) self.OUTPUT_PATH = unicode( os.environ['BELL_ONEOFF__OUTPUT_FULL_JSON_METADATA_PATH'] ) def merge_data( self ): """ Merges subset into full set; outputs new full set. """ ( initial_full_dct, subset_dct ) = self.loadup() key_val_lst = sorted( subset_dct['items'].items() ) for (accession_num, item_dct) in key_val_lst: initial_full_dct['items'][accession_num] = item_dct
def __init__( self, kwargs ): self.logger = bell_logger.setup_logger() self.solr_root_url = kwargs['solr_root_url'] # CUSTOM-solr index; TODO: rename so it's not confused with bdr-solr