def get_recids_for_rules(rules): """ Generates the final list of record IDs to load. @param rules dict of rules {rule_name: rule_dict} @type rules: dict of rules @return dict {rule_name: array of record IDs} """ override_record_ids = task_get_option("record_ids") recids = {} for rule_name, rule in rules.iteritems(): if "filter_pattern" in rule: query = rule["filter_pattern"] if "filter_collection" in rule: collections = rule["filter_collection"].split() else: collections = None write_message("Performing given search query: '%s'" % query) if collections: result = perform_request_search(p=query, of='intbitset', wl=rule.get('filter_limit', 0), f=rule.get( 'filter_field', None), c=collections) else: result = search_pattern( p=query, wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), ) else: result = intbitset(trailing_bits=True) if override_record_ids is not None: result.intersection_update(override_record_ids) else: last_run = get_rule_lastrun(rule_name) modified_recids = get_modified_records_since(last_run) if not "consider_deleted_records" in rule: modified_recids -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e') if CFG_CERN_SITE: modified_recids -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e') result.intersection_update(modified_recids) recids[rule_name] = result return recids
def oai_get_recid_list(set_spec="", fromdate="", untildate=""): """ Returns list of recids for the OAI set 'set', modified from 'fromdate' until 'untildate'. """ ret = intbitset() if not set_spec: ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, m='e') if CFG_OAI_DELETED_POLICY != 'no': ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_PREVIOUS_SET_FIELD, m='e') else: ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, m='e') ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_SET_FIELD, m='e') if CFG_OAI_DELETED_POLICY != 'no': ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, m='e') ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, m='e') if CFG_OAI_DELETED_POLICY == 'no': ret -= search_unit_in_bibxxx(p='DELETED', f='980__%', m='e') if CFG_CERN_SITE: ret -= search_unit_in_bibxxx(p='DUMMY', f='980__%', m='e') return filter_out_based_on_date_range(ret, fromdate, untildate, set_spec)
def get_recids_for_rules(rules): """ Generates the final list of record IDs to load. @param rules dict of rules {rule_name: rule_dict} @type rules: dict of rules @return dict {rule_name: array of record IDs} """ override_record_ids = task_get_option("record_ids") recids = {} for rule_name, rule in rules.iteritems(): if "filter_pattern" in rule: query = rule["filter_pattern"] if "filter_collection" in rule: collections = rule["filter_collection"].split() else: collections = None write_message("Performing given search query: '%s'" % query) if collections: result = perform_request_search( p=query, of='intbitset', wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), c=collections ) else: result = search_pattern( p=query, wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), ) else: result = intbitset(trailing_bits=True) if override_record_ids is not None: result.intersection_update(override_record_ids) else: last_run = get_rule_lastrun(rule_name) modified_recids = get_modified_records_since(last_run) if not "consider_deleted_records" in rule: modified_recids -= search_unit_in_bibxxx(p='DELETED', f='980__%', m='e') if CFG_CERN_SITE: modified_recids -= search_unit_in_bibxxx(p='DUMMY', f='980__%', m='e') result.intersection_update(modified_recids) recids[rule_name] = result return recids
def oai_get_recid_list(set_spec="", fromdate="", untildate=""): """ Returns list of recids for the OAI set 'set', modified from 'fromdate' until 'untildate'. """ ret = intbitset() if not set_spec: ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e') if CFG_OAI_DELETED_POLICY != 'no': ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_PREVIOUS_SET_FIELD, type='e') else: ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e') ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_SET_FIELD, type='e') if CFG_OAI_DELETED_POLICY != 'no': ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e') ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e') if CFG_OAI_DELETED_POLICY == 'no': ret -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e') if CFG_CERN_SITE: ret -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e') return filter_out_based_on_date_range(ret, fromdate, untildate, set_spec)
def oai_get_recid_list(set_spec="", fromdate="", untildate=""): """ Returns list of recids for the OAI set 'set', modified from 'fromdate' until 'untildate'. """ ret = intbitset() if not set_spec: ret |= search_unit_in_bibxxx(p="*", f=CFG_OAI_SET_FIELD, m="e") if CFG_OAI_DELETED_POLICY != "no": ret |= search_unit_in_bibxxx(p="*", f=CFG_OAI_PREVIOUS_SET_FIELD, m="e") else: ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, m="e") ret |= search_unit_in_bibxxx(p="%s:*" % set_spec, f=CFG_OAI_SET_FIELD, m="e") if CFG_OAI_DELETED_POLICY != "no": ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, m="e") ret |= search_unit_in_bibxxx(p="%s:*" % set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, m="e") if CFG_OAI_DELETED_POLICY == "no": ret -= search_unit_in_bibxxx(p="DELETED", f="980__%", m="e") if CFG_CERN_SITE: ret -= search_unit_in_bibxxx(p="DUMMY", f="980__%", m="e") return filter_out_based_on_date_range(ret, fromdate, untildate, set_spec)
def oairepositoryupdater_task(): """Main business logic code of oai_archive""" no_upload = task_get_option("no_upload") report = task_get_option("report") if report > 1: print_repository_status(verbose=report) return True initial_snapshot = {} for set_spec in all_set_specs(): initial_snapshot[set_spec] = get_set_definitions(set_spec) write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2) task_update_progress("Fetching records to process") recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e') write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2) all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e') no_more_exported_recids = intbitset(all_current_recids) write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2) all_affected_recids = intbitset() all_should_recids = intbitset() recids_for_set = {} for set_spec in all_set_specs(): if not set_spec: set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC should_recids = get_recids_for_set_spec(set_spec) recids_for_set[set_spec] = should_recids no_more_exported_recids -= should_recids all_should_recids |= should_recids current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e') write_message("%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2) to_add = should_recids - current_recids write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2) to_remove = current_recids - should_recids write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2) affected_recids = to_add | to_remove write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2) all_affected_recids |= affected_recids missing_oaiid = all_should_recids - recids_with_oaiid write_message("%s recids are missing an oaiid" % len(missing_oaiid)) write_message("%s recids should no longer be exported" % len(no_more_exported_recids)) ## Let's add records with missing OAI ID all_affected_recids |= missing_oaiid | no_more_exported_recids write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2) if not all_affected_recids: write_message("Nothing to do!") return True # Prepare to save results in a tmp file (fd, filename) = mkstemp(dir=CFG_TMPDIR, prefix='oairepository_' + \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) oai_out = os.fdopen(fd, "w") oai_out.write("<collection>") tot = 0 # Iterate over the recids for i, recid in enumerate(all_affected_recids): task_sleep_now_if_required(can_stop_too=True) task_update_progress("Done %s out of %s records." % \ (i, len(all_affected_recids))) write_message("Elaborating recid %s" % recid, verbose=3) record = get_record(recid) if not record: write_message("Record %s seems empty. Let's skip it." % recid, verbose=3) continue new_record = {} # Check if an OAI identifier is already in the record or # not. assign_oai_id_entry = False oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5]) if not oai_id_entry: assign_oai_id_entry = True oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid) write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3) else: write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3) # Get the sets to which this record already belongs according # to the metadata current_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5])) write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3) current_previous_oai_sets = set(record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5])) write_message("Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3) # Get the sets that should be in this record according to # settings updated_oai_sets = set(_set for _set, _recids in iteritems(recids_for_set) if recid in _recids) write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3) updated_previous_oai_sets = set(_set for _set in (current_previous_oai_sets - updated_oai_sets) | (current_oai_sets - updated_oai_sets)) write_message("Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3) # Ok, we have the old sets and the new sets. If they are equal # and oai ID does not need to be added, then great, nothing to # change . Otherwise apply the new sets. if current_oai_sets == updated_oai_sets and not assign_oai_id_entry: write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3) continue # Jump to next recid write_message("Something has changed for record %s, let's update it!" % recid, verbose=3) subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)] for oai_set in updated_oai_sets: subfields.append((CFG_OAI_SET_FIELD[5], oai_set)) for oai_set in updated_previous_oai_sets: subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set)) record_add_field(new_record, tag="001", controlfield_value=str(recid)) record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields) oai_out.write(record_xml_output(new_record)) tot += 1 if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE: oai_out.write("</collection>") oai_out.close() write_message("Wrote to file %s" % filename) if not no_upload: if task_get_option("notimechange"): task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n') else: task_low_level_submission('bibupload', 'oairepository', '-c', filename) # Prepare to save results in a tmp file (fd, filename) = mkstemp(dir=CFG_TMPDIR, prefix='oairepository_' + \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) oai_out = os.fdopen(fd, "w") oai_out.write("<collection>") tot = 0 task_sleep_now_if_required(can_stop_too=True) oai_out.write("</collection>") oai_out.close() write_message("Wrote to file %s" % filename) if tot > 0: if not no_upload: task_sleep_now_if_required(can_stop_too=True) if task_get_option("notimechange"): task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n') else: task_low_level_submission('bibupload', 'oairepository', '-c', filename) else: os.remove(filename) return True
def repository_size(): """Read repository size""" return len(search_unit_in_bibxxx(p="*", f=CFG_OAI_SET_FIELD, type="e"))
def print_repository_status(local_write_message=write_message, verbose=0): """ Prints the repository status to the standard output. Parameters: write_message - *function* the function used to write the output verbose - *int* the verbosity of the output - 0: print repository size - 1: print quick status of each set (numbers can be wrong if the repository is in some inconsistent state, i.e. a record is in an OAI setSpec but has not OAI ID) - 2: print detailed status of repository, with number of records that needs to be synchronized according to the sets definitions. Precise, but ~slow... """ repository_size_s = "%d" % repository_size() repository_recids_after_update = intbitset() local_write_message(CFG_SITE_NAME) local_write_message(" OAI Repository Status") set_spec_max_length = 19 # How many max char do we display for set_name_max_length = 20 # setName and setSpec? if verbose == 0: # Just print repository size local_write_message(" Total(**)" + " " * 29 + " " * (9 - len(repository_size_s)) + repository_size_s) return elif verbose == 1: # We display few information: show longer set name and spec set_spec_max_length = 30 set_name_max_length = 30 local_write_message("=" * 80) header = " setSpec" + " " * (set_spec_max_length - 7) + \ " setName" + " " * (set_name_max_length - 5) + " Volume" if verbose > 1: header += " " * 5 + "After update(*):" local_write_message(header) if verbose > 1: local_write_message(" " * 57 + "Additions Deletions") local_write_message("-" * 80) for set_spec in all_set_specs(): if verbose <= 1: # Get the records that are in this set. This is an # incomplete check, as it can happen that some records are # in this set (according to the metadata) but have no OAI # ID (so they are not exported). This can happen if the # repository has some records coming from external # sources, or if it has never been synchronized with this # tool. current_recids = get_recids_for_set_spec(set_spec) nb_current_recids = len(current_recids) else: # Get the records that are *currently* exported for this # setSpec current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e') nb_current_recids = len(current_recids) # Get the records that *should* be in this set according to # the admin defined settings, and compute how many should be # added or removed should_recids = get_recids_for_set_spec(set_spec) repository_recids_after_update |= should_recids nb_add_recids = len(should_recids - current_recids) nb_remove_recids = len(current_recids - should_recids) nb_should_recids = len(should_recids) # Adapt setName and setSpec strings lengths set_spec_str = set_spec if len(set_spec_str) > set_spec_max_length : set_spec_str = "%s.." % set_spec_str[:set_spec_max_length] set_name_str = get_set_name_for_set_spec(set_spec) if len(set_name_str) > set_name_max_length : set_name_str = "%s.." % set_name_str[:set_name_max_length] row = " " + set_spec_str + \ " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \ " " * ((set_name_max_length + 2) - len(set_name_str)) + \ " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids) if verbose > 1: row += \ " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \ " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\ " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids) local_write_message(row) local_write_message("=" * 80) footer = " Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \ " " * (9 - len(repository_size_s)) + repository_size_s if verbose > 1: footer += ' ' * (28 - len(str(len(repository_recids_after_update)))) + str(len(repository_recids_after_update)) local_write_message(footer) if verbose > 1: local_write_message(' *The "after update" columns show the repository after you run this tool.') else: local_write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.') local_write_message('**The "total" is not the sum of the above numbers, but the union of the records.')
def oairepositoryupdater_task(): """Main business logic code of oai_archive""" no_upload = task_get_option("no_upload") report = task_get_option("report") if report > 1: print_repository_status(verbose=report) return True if run_sql( "SELECT id FROM schTASK WHERE proc='bibupload:oairepository' AND status='WAITING'" ): write_message( "Previous requests of oairepository still being elaborated. Let's skip this execution." ) return True initial_snapshot = {} for set_spec in all_set_specs(): initial_snapshot[set_spec] = get_set_definitions(set_spec) write_message("Initial set snapshot: %s" % pformat(initial_snapshot), verbose=2) task_update_progress("Fetching records to process") recids_with_oaiid = search_unit_in_bibxxx(p='*', f=CFG_OAI_ID_FIELD, type='e') write_message("%s recids have an OAI ID" % len(recids_with_oaiid), verbose=2) all_current_recids = search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e') no_more_exported_recids = intbitset(all_current_recids) write_message("%s recids are currently exported" % (len(all_current_recids)), verbose=2) all_affected_recids = intbitset() all_should_recids = intbitset() recids_for_set = {} for set_spec in all_set_specs(): if not set_spec: set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC should_recids = get_recids_for_set_spec(set_spec) recids_for_set[set_spec] = should_recids no_more_exported_recids -= should_recids all_should_recids |= should_recids current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e') write_message( "%s recids should be in %s. Currently %s are in %s" % (len(should_recids), set_spec, len(current_recids), set_spec), verbose=2) to_add = should_recids - current_recids write_message("%s recids should be added to %s" % (len(to_add), set_spec), verbose=2) to_remove = current_recids - should_recids write_message("%s recids should be removed from %s" % (len(to_remove), set_spec), verbose=2) affected_recids = to_add | to_remove write_message("%s recids should be hence updated for %s" % (len(affected_recids), set_spec), verbose=2) all_affected_recids |= affected_recids missing_oaiid = all_should_recids - recids_with_oaiid write_message("%s recids are missing an oaiid" % len(missing_oaiid)) write_message("%s recids should no longer be exported" % len(no_more_exported_recids)) ## Let's add records with missing OAI ID all_affected_recids |= missing_oaiid | no_more_exported_recids write_message("%s recids should updated" % (len(all_affected_recids)), verbose=2) if not all_affected_recids: write_message("Nothing to do!") return True # Prepare to save results in a tmp file (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR, prefix='oairepository_' + \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) oai_out = os.fdopen(fd, "w") oai_out.write("<collection>") tot = 0 # Iterate over the recids for i, recid in enumerate(all_affected_recids): task_sleep_now_if_required(can_stop_too=True) task_update_progress("Done %s out of %s records." % \ (i, len(all_affected_recids))) write_message("Elaborating recid %s" % recid, verbose=3) record = get_record(recid) if not record: write_message("Record %s seems empty. Let's skip it." % recid, verbose=3) continue new_record = {} # Check if an OAI identifier is already in the record or # not. assign_oai_id_entry = False oai_id_entry = record_get_field_value(record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], code=CFG_OAI_ID_FIELD[5]) if not oai_id_entry: assign_oai_id_entry = True oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid) write_message("Setting new oai_id %s for record %s" % (oai_id_entry, recid), verbose=3) else: write_message("Already existing oai_id %s for record %s" % (oai_id_entry, recid), verbose=3) # Get the sets to which this record already belongs according # to the metadata current_oai_sets = set( record_get_field_values(record, tag=CFG_OAI_SET_FIELD[:3], ind1=CFG_OAI_SET_FIELD[3], ind2=CFG_OAI_SET_FIELD[4], code=CFG_OAI_SET_FIELD[5])) write_message("Record %s currently belongs to these oai_sets: %s" % (recid, ", ".join(current_oai_sets)), verbose=3) current_previous_oai_sets = set( record_get_field_values(record, tag=CFG_OAI_PREVIOUS_SET_FIELD[:3], ind1=CFG_OAI_PREVIOUS_SET_FIELD[3], ind2=CFG_OAI_PREVIOUS_SET_FIELD[4], code=CFG_OAI_PREVIOUS_SET_FIELD[5])) write_message( "Record %s currently doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(current_previous_oai_sets)), verbose=3) # Get the sets that should be in this record according to # settings updated_oai_sets = set(_set for _set, _recids in iteritems(recids_for_set) if recid in _recids) write_message("Record %s now belongs to these oai_sets: %s" % (recid, ", ".join(updated_oai_sets)), verbose=3) updated_previous_oai_sets = set( _set for _set in (current_previous_oai_sets - updated_oai_sets) | (current_oai_sets - updated_oai_sets)) write_message( "Record %s now doesn't belong anymore to these oai_sets: %s" % (recid, ", ".join(updated_previous_oai_sets)), verbose=3) # Ok, we have the old sets and the new sets. If they are equal # and oai ID does not need to be added, then great, nothing to # change . Otherwise apply the new sets. if current_oai_sets == updated_oai_sets and not assign_oai_id_entry: write_message("Nothing has changed for record %s, let's move on!" % recid, verbose=3) continue # Jump to next recid write_message("Something has changed for record %s, let's update it!" % recid, verbose=3) subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)] for oai_set in updated_oai_sets: subfields.append((CFG_OAI_SET_FIELD[5], oai_set)) for oai_set in updated_previous_oai_sets: subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set)) record_add_field(new_record, tag="001", controlfield_value=str(recid)) record_add_field(new_record, tag=CFG_OAI_ID_FIELD[:3], ind1=CFG_OAI_ID_FIELD[3], ind2=CFG_OAI_ID_FIELD[4], subfields=subfields) oai_out.write(record_xml_output(new_record)) tot += 1 if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE: oai_out.write("</collection>") oai_out.close() write_message("Wrote to file %s" % filename) if not no_upload: if task_get_option("notimechange"): task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n', '-Noairepository', '-P', '-1') else: task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-Noairepository', '-P', '-1') # Prepare to save results in a tmp file (fd, filename) = mkstemp(dir=CFG_TMPSHAREDDIR, prefix='oairepository_' + \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) oai_out = os.fdopen(fd, "w") oai_out.write("<collection>") tot = 0 task_sleep_now_if_required(can_stop_too=True) oai_out.write("</collection>") oai_out.close() write_message("Wrote to file %s" % filename) if tot > 0: if not no_upload: task_sleep_now_if_required(can_stop_too=True) if task_get_option("notimechange"): task_low_level_submission('bibupload', 'oairepository', '-c', filename, '-n') else: task_low_level_submission('bibupload', 'oairepository', '-c', filename) else: os.remove(filename) return True
def print_repository_status(local_write_message=write_message, verbose=0): """ Prints the repository status to the standard output. Parameters: write_message - *function* the function used to write the output verbose - *int* the verbosity of the output - 0: print repository size - 1: print quick status of each set (numbers can be wrong if the repository is in some inconsistent state, i.e. a record is in an OAI setSpec but has not OAI ID) - 2: print detailed status of repository, with number of records that needs to be synchronized according to the sets definitions. Precise, but ~slow... """ repository_size_s = "%d" % repository_size() repository_recids_after_update = intbitset() local_write_message(CFG_SITE_NAME) local_write_message(" OAI Repository Status") set_spec_max_length = 19 # How many max char do we display for set_name_max_length = 20 # setName and setSpec? if verbose == 0: # Just print repository size local_write_message(" Total(**)" + " " * 29 + " " * (9 - len(repository_size_s)) + repository_size_s) return elif verbose == 1: # We display few information: show longer set name and spec set_spec_max_length = 30 set_name_max_length = 30 local_write_message("=" * 80) header = " setSpec" + " " * (set_spec_max_length - 7) + \ " setName" + " " * (set_name_max_length - 5) + " Volume" if verbose > 1: header += " " * 5 + "After update(*):" local_write_message(header) if verbose > 1: local_write_message(" " * 57 + "Additions Deletions") local_write_message("-" * 80) for set_spec in all_set_specs(): if verbose <= 1: # Get the records that are in this set. This is an # incomplete check, as it can happen that some records are # in this set (according to the metadata) but have no OAI # ID (so they are not exported). This can happen if the # repository has some records coming from external # sources, or if it has never been synchronized with this # tool. current_recids = get_recids_for_set_spec(set_spec) nb_current_recids = len(current_recids) else: # Get the records that are *currently* exported for this # setSpec current_recids = search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e') nb_current_recids = len(current_recids) # Get the records that *should* be in this set according to # the admin defined settings, and compute how many should be # added or removed should_recids = get_recids_for_set_spec(set_spec) repository_recids_after_update |= should_recids nb_add_recids = len(should_recids - current_recids) nb_remove_recids = len(current_recids - should_recids) nb_should_recids = len(should_recids) # Adapt setName and setSpec strings lengths set_spec_str = set_spec if len(set_spec_str) > set_spec_max_length: set_spec_str = "%s.." % set_spec_str[:set_spec_max_length] set_name_str = get_set_name_for_set_spec(set_spec) if len(set_name_str) > set_name_max_length: set_name_str = "%s.." % set_name_str[:set_name_max_length] row = " " + set_spec_str + \ " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \ " " * ((set_name_max_length + 2) - len(set_name_str)) + \ " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids) if verbose > 1: row += \ " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \ " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\ " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids) local_write_message(row) local_write_message("=" * 80) footer = " Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \ " " * (9 - len(repository_size_s)) + repository_size_s if verbose > 1: footer += ' ' * (28 - len(str(len(repository_recids_after_update)))) + str( len(repository_recids_after_update)) local_write_message(footer) if verbose > 1: local_write_message( ' *The "after update" columns show the repository after you run this tool.' ) else: local_write_message( ' *"Volume" is indicative if repository is out of sync. Use --detailed-report.' ) local_write_message( '**The "total" is not the sum of the above numbers, but the union of the records.' )