def run_delete_via_bdr_item_api( pid ):
    """ Calls to remove pid from bdr. """
    assert type(pid) == unicode
    reindexer = CustomReindexer( bell_logger.setup_logger() )
    item_api_url = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_URL'] )
    identity = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_AUTH_NAME'] )
    auth_code = unicode( os.environ['BELL_ONEOFF__OLD_ITEM_API_AUTH_KEY'] )
    reindexer.delete_via_bdr_item_api( pid, item_api_url, identity, auth_code )
    return
def run_remove_pid_from_custom_index( pid ):
    """ Calls to remove pid from custom bell index. """
    assert type(pid) == unicode
    reindexer = CustomReindexer( bell_logger.setup_logger() )
    reindexer.remove_pid_from_custom_index( pid )
    bell_q.enqueue_call(
        func='bell_code.one_offs.rebuild_custom_index.run_delete_via_bdr_item_api',
        kwargs={ 'pid': pid } )
    return
Ejemplo n.º 3
0
def run_add_image_datastream( data ):
    """ Runner for add_image_datastream().
        Called by queue-job triggered by tasks.task_manager.determine_next_task(). """
    assert sorted( data.keys() ) == ['item_data', 'pid', 'update_image'], sorted( data.keys() )
    logger = bell_logger.setup_logger()
    ih = ImageHandler( data, logger )
    ih.add_image_datastream()
    task_manager.determine_next_task( current_task=unicode(sys._getframe().f_code.co_name), logger=logger,
        data=data )
    return
def run_make_pid_list_from_bdr_data():
    """ Calls for a list of pids for the given collection_pid. """
    bdr_collection_pid=os.environ['BELL_ANTP__COLLECTION_PID']
    bdr_search_api_root=os.environ['BELL_ANTP__SOLR_ROOT']
    reindexer = CustomReindexer( bell_logger.setup_logger() )
    collection_pids = reindexer.make_pid_list( bdr_collection_pid, bdr_search_api_root )
    bell_q.enqueue_call(
        func='bell_code.one_offs.rebuild_custom_index.run_make_pids_to_remove',
        kwargs={ 'pids_from_collection': collection_pids } )
    return
Ejemplo n.º 5
0
def run_check_update_image( data ):
    """ Runner for check_update_image().
        Called by queue-job triggered by tasks.task_manager.determine_next_task(). """
    assert sorted( data.keys() ) == ['item_data', 'pid'], sorted( data.keys() )
    logger = bell_logger.setup_logger()
    ih = ImageHandler( data, logger )
    update_image = ih.check_update_image()
    task_manager.determine_next_task( current_task=unicode(sys._getframe().f_code.co_name), logger=logger,
        data={ 'item_data': data['item_data'], 'pid': data['pid'], 'update_image': update_image } )
    return
def run_make_pid_dict_from_bell_data():
    """ Calls to create a json file containing an accession_number-to-pid dict. """
    bdr_collection_pid=os.environ['BELL_ANTP__COLLECTION_PID']
    fmpro_json_path=os.environ['BELL_ANTP__BELL_DICT_JSON_PATH']  # file of dict of bell-accession-number to metadata
    bdr_search_api_root=os.environ['BELL_ANTP__SOLR_ROOT']
    output_json_path=os.environ['BELL_ANTP__OUTPUT_JSON_PATH']
    reindexer = CustomReindexer( bell_logger.setup_logger() )
    reindexer.make_pid_dict( bdr_collection_pid, fmpro_json_path, bdr_search_api_root, output_json_path )
    bell_q.enqueue_call(
        func='bell_code.one_offs.rebuild_custom_index.run_make_pid_list_from_bdr_data',
        kwargs={} )
    return
Ejemplo n.º 7
0
def delete_jp2( data ):
    """ Cleans up created derivative. """
    logger = bell_logger.setup_logger()
    ( item_data_dict, jp2_path, pid ) = ( data['item_data'], data['jp2_path'], data['pid'] )
    assert jp2_path[-4:] == '.jp2'
    os.remove( jp2_path )
    task_manager.determine_next_task(
        unicode(sys._getframe().f_code.co_name),
        data={ 'item_data': item_data_dict, 'pid': pid },
        logger=logger
        )
    return
def ensure_redis():
    """ Checks that redis is running. """
    logger = bell_logger.setup_logger()
    logger.info( 'STARTING_PROCESSING...' )
    try:
        assert len(r.keys()) > -1  # if redis isn't running this will generate an error
        next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger )  # passes current function name
        # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 )
        logger.info( 'in check_environment.ensure_redis(); redis-check ok' )
        return
    except Exception as e:
        message = 'in check_environment.ensure_redis(); redis does not appear to be running; exception: %s' % unicode(repr(e))
        logger.error( message )
        raise Exception( message )
def populate_queue():
    """ Puts individual bell items on the queue. """
    logger = bell_logger.setup_logger()
    try:
        with open( os.environ.get('BELL_CE__BELL_DICT_JSON_PATH') ) as f:
            all_items_dict = json.loads( f.read() )
        for i,(accnum_key, item_dict_value) in enumerate( sorted(all_items_dict['items'].items()) ):
            determine_next_task( current_task=sys._getframe().f_code.co_name, data=item_dict_value, logger=logger )
            if i+2 > int( os.environ.get('BELL_TM__POPULATE_QUEUE_LIMIT') ):  # for development
                logger.debug( 'in task_manager.populate_queue(); breaking after %s' % accnum_key ); break
        update_tracker( key='GENERAL', message='queue populated' )
        logger.info( 'in task_manager.populate_queue(); populate_queue ok' )
        return
    except Exception as e:
        message = 'in task_manager.populate_queue(); problem in populate_queue(); exception is: %s' % unicode(repr(e))
        logger.error( message )
        raise Exception( message )
def archive_previous_work():
    """ Archives previous redis data. """
    logger = bell_logger.setup_logger()
    try:
        bell_dir = unicode( os.environ.get('BELL_LOG_DIR') )
        now_string = unicode( datetime.datetime.now() ).replace( ' ', '_' )
        archive_file_path = '%s/%s.archive' % ( bell_dir, now_string )
        jstring = _convert_tracker_to_dict()
        with open( archive_file_path, 'w' ) as f:
            f.write( jstring )
        next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger )
        # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 )
        logger.info( 'in check_environment.archive_previous_work(); archive_previous_work ok' )
        return
    except Exception as e:
        message = 'in check_environment.archive_previous_work(); problem archiving previous work; exception: %s' % unicode(repr(e))
        logger.error( message )
        raise Exception( message )
def check_foundation_files():
    """ Checks that foundation-files exist. """
    logger = bell_logger.setup_logger()
    try:
        for filepath in [ os.environ.get('BELL_CE__BELL_DICT_JSON_PATH'), os.environ.get('BELL_CE__AccToPidDict_JSON_PATH') ]:
            try:
                assert os.path.exists( filepath )
            except Exception as e:
                message = 'Problem finding filepath %s; exception: %s' % ( filepath, unicode(repr(e)) )
                logger.error( message ); raise Exception( message )
        task_manager.update_tracker( key='GENERAL', message='foundation files ok' )
        next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger )
        # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 )
        logger.info( 'in check_environment.check_foundation_files(); files ok' )
        return
    except Exception as e:
        message = 'in check_environment.check_foundation_files(); problem checking foundation files; exception: %s' % unicode(repr(e))
        logger.error( message )
        raise Exception( message )
def ensure_redis_status_dict():
    """ Ensures the status dict exists. Resets it if required.
        Each key's value is a json-serializable list. """
    logger = bell_logger.setup_logger()
    try:
        OVERWRITE = unicode( os.environ.get('BELL_CE__TRACKER_OVERWRITE') )
        tracker_key = 'bell:tracker'
        if OVERWRITE == 'TRUE':
            r.delete( tracker_key )
        if not r.exists( tracker_key ):
            message = '%s initialized %s' % ( tracker_key, unicode(datetime.datetime.now()) )
            r.hset( tracker_key, 'GENERAL', json.dumps([message]) )
        next = task_manager.determine_next_task( sys._getframe().f_code.co_name, logger=logger )
        # job = q.enqueue_call ( func='%s' % next, args=(), timeout=30 )
        logger.info( 'in check_environment.ensure_redis_status_dict(); bell_status ok' )
        return
    except Exception as e:
        message = 'in check_environment.ensure_redis_status_dict(); redis bell_status not set; exception: %s' % unicode(repr(e))
        logger.error( message )
        raise Exception( message )
def run_start_reindex_all():
    """ Starts full custom-reindex process
        Calls to build a json file of bell latest xml export.
        Process:
        - build list of all accession-numbers from latest run.
        - get pids for those accession-numbers.
          - (there should be a pid for each accession number)
          - this is the list of accession-numbers/pids to reindex
        - build list of all pids in bell collection
        - build list of all pids not in the to-index list
          - this is the list of accession-numbers/pids to remove from the index
        - enqueue all the remove jobs
        - enqueue all the reindex jobs
        """
    reindexer = CustomReindexer( bell_logger.setup_logger() )
    fmpro_xml_path = unicode( os.environ['BELL_ANTD__FMPRO_XML_PATH'] )
    fmpro_json_path = unicode( os.environ['BELL_ANTD__JSON_OUTPUT_PATH'] )
    reindexer.make_initial_json( fmpro_xml_path, fmpro_json_path )  # just savws to json; nothing returned
    bell_q.enqueue_call(
        func='bell_code.one_offs.rebuild_custom_index.run_make_pid_dict_from_bell_data',
        kwargs={} )
    return
def run__update_existing_metadata_and_create_image( data ):
    """ Takes data-dict; example { item_dict: {title: the-title, acc-num: the-acc-num} }.
        Instantiates Task() instance & calls update_existing_metadata_and_create_image().
        Called by task_manager.determine_next_task(). """
    logger = bell_logger.setup_logger()
    logger.info( 'in fedora_metadata_updater_and_image_builder.run__update_existing_metadata_and_create_image(); starting.' )
    print 'in fedora_metadata_and_image_builder.run__update_existing_metadata_and_create_image(); acc_num is: %s' % data['item_dict']['calc_accession_id']
    MASTER_IMAGES_DIR_PATH = unicode( os.environ.get('BELL_FMAIB__MASTER_IMAGES_DIR_PATH') )
    MASTER_IMAGES_DIR_URL = unicode( os.environ.get('BELL_FMAIB__MASTER_IMAGES_DIR_URL'))
    JP2_IMAGES_DIR_PATH = unicode( os.environ.get('BELL_FMAIB__JP2_IMAGES_DIR_PATH') )
    JP2_IMAGES_DIR_URL = unicode( os.environ.get('BELL_FMAIB__JP2_IMAGES_DIR_URL') )
    API_URL = unicode( os.environ.get('BELL_FMUAIB__PRIVATE_API_URL') )
    mods_schema_path = os.path.abspath( './lib/mods-3-4.xsd' )
    task = Task()
    task._print_settings(
        MASTER_IMAGES_DIR_PATH, MASTER_IMAGES_DIR_URL, JP2_IMAGES_DIR_PATH, JP2_IMAGES_DIR_URL,
        API_URL
        )
    task.update_existing_metadata_and_create_image(
        MASTER_IMAGES_DIR_PATH, MASTER_IMAGES_DIR_URL, JP2_IMAGES_DIR_PATH, JP2_IMAGES_DIR_URL,
        data['pid'], data['item_dict'], API_URL, logger
        )
    print 'in edora_metadata_updater_and_image_builder.run__update_existing_metadata_and_create_image(); acc_num is: %s; item ingested' % data['item_dict']['calc_accession_id']
    return
def run_make_pids_to_remove( pids_from_collection ):
    """ Calls for a list of pids to remove. """
    assert type(pids_from_collection) == list
    ( pids_for_accession_number_json_path, reindexer ) = ( unicode(os.environ['BELL_ANTP__OUTPUT_JSON_PATH']), CustomReindexer(bell_logger.setup_logger()) )
    pids_to_remove = reindexer.make_pids_to_remove( pids_from_collection, pids_for_accession_number_json_path )
    bell_q.enqueue_call(
        func='bell_code.one_offs.rebuild_custom_index.run_make_pids_to_update',
        kwargs={} )
    for pid in pids_to_remove:
        bell_q.enqueue_call(
            func='bell_code.one_offs.rebuild_custom_index.run_remove_pid_from_custom_index',
            kwargs={ 'pid': pid } )
    return
"""
Merges a metadata json subset file into a complete json metadata file.
- outputs to a new complete json file.
- example use-case:
  - J.C. provides full metadata file.
  - I find issue with metadata filenames for, say, 50 images.
  - J.C. fixes filenames in db and exports the 50 metadata records.
  - I want to merge the 50 updated metadata-records into the complete set of medata records.
"""

import datetime, json, os, pprint
import logging.handlers
from bell_code import bell_logger

logger = bell_logger.setup_logger()


class Merger( object ):

    def __init__( self ):
        self.SOURCE_FULL_JSON_METADATA_PATH = unicode( os.environ['BELL_ONEOFF__SOURCE_FULL_JSON_METADATA_PATH'] )
        self.SOURCE_SUBSET_JSON_METADATA_PATH = unicode( os.environ['BELL_ONEOFF__SOURCE_SUBSET_JSON_METADATA_PATH'] )
        self.OUTPUT_PATH = unicode( os.environ['BELL_ONEOFF__OUTPUT_FULL_JSON_METADATA_PATH'] )

    def merge_data( self ):
        """ Merges subset into full set; outputs new full set. """
        ( initial_full_dct, subset_dct ) = self.loadup()
        key_val_lst = sorted( subset_dct['items'].items() )
        for (accession_num, item_dct) in key_val_lst:
            initial_full_dct['items'][accession_num] = item_dct
 def __init__( self, kwargs ):
     self.logger = bell_logger.setup_logger()
     self.solr_root_url = kwargs['solr_root_url']  # CUSTOM-solr index; TODO: rename so it's not confused with bdr-solr