def check_record(record, source_field, new_field, subfield_filter): """ Changes the code of a field to new_field """ from collections import namedtuple from invenio.bibrecord import (record_add_field, record_delete_field, record_get_field_instances) assert len(source_field) == 5 assert len(new_field) == 5 source_field = source_field.replace("_", " ") new_field = new_field.replace("_", " ") assert len(subfield_filter) == 2 SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value']) subfield_filter = SubfieldFilter(*subfield_filter) def filter_passes(subfield_code, result): return subfield_filter.code is None or ( subfield_filter.code in ('%', subfield_code) and subfield_filter.value == result) subfields_list = [] for subfields, ind1, ind2, _, pos in record_get_field_instances( record, source_field[:3], source_field[3], source_field[4]): if any(filter_passes(*s) for s in subfields): subfields_list.append(subfields) record_delete_field(record, source_field[:3], ind1, ind2, pos) for subfields in subfields_list: record_add_field(record, new_field[:3], new_field[3], new_field[4], subfields=subfields) record.set_amended('move from %s to %s: %s' % (source_field.replace(" ", "_"), new_field.replace(" ", "_"), subfields))
def delete_field(rec, fnum, findex): """Delete a specific field. @param rec: a record dictionary structure @param fnum: a 3 characters long string indicating field tag number @param findex: the rec field position in the group of fields it belongs """ record_delete_field(rec, fnum, field_position_local=findex)
def check_record(record): """ Split fields """ from invenio.bibrecord import record_delete_field from invenio.bibrecord import record_add_field message = "" marc = '693__e' tag = marc[:3] if not record.has_key(tag): continue ind1 = marc[3].replace('_', ' ') ind2 = marc[4].replace('_', ' ') sfcode = marc[5] to_split = fields_to_split(record, tag, ind1, ind2, sfcode) if not to_split: continue # work from the back to try to preserve order positions = to_split.keys() positions.sort(reverse=True) for global_pos in positions: (parts, rest_before, rest_after) = to_split[global_pos] message += " - split %s %s" % (tag, parts) record_delete_field(record, tag, ind1, ind2, field_position_global=global_pos) parts.reverse() for subfield in parts: field = rest_before + [subfield, ] + rest_after record_add_field(record, tag, ind1, ind2, '', field, field_position_global=global_pos) if message: record.set_amended(message)
def _delete_field_condition(self, record): """Checks if a subfield meets the condition for the field to be deleted """ try: for field in record[self._tag]: for subfield in field[0]: if subfield[0] == self._conditionSubfield: if self._condition_exact_match: if self._condition == subfield[1]: bibrecord.record_delete_field( record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break else: if self._condition in subfield[1]: bibrecord.record_delete_field( record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break except KeyError: pass
def check_record(record, source_field, new_field, subfield_filter): """ Changes the code of a field to new_field """ from collections import namedtuple from invenio.bibrecord import (record_add_field, record_delete_field, record_get_field_instances) assert len(source_field) == 5 assert len(new_field) == 5 source_field = source_field.replace("_", " ") new_field = new_field.replace("_", " ") assert len(subfield_filter) == 2 SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value']) subfield_filter = SubfieldFilter(*subfield_filter) def filter_passes(subfield_code, result): return subfield_filter.code is None or ( subfield_filter.code in ('%', subfield_code) and subfield_filter.value == result) subfields_list = [] for subfields, ind1, ind2, _, pos in record_get_field_instances( record, source_field[:3], source_field[3], source_field[4]): if any(filter_passes(*s) for s in subfields): subfields_list.append(subfields) record_delete_field(record, source_field[:3], ind1, ind2, pos) for subfields in subfields_list: record_add_field(record, new_field[:3], new_field[3], new_field[4], subfields=subfields) record.set_amended('move from %s to %s: %s' % (source_field.replace( " ", "_"), new_field.replace(" ", "_"), subfields))
def process_record(self, record): """@see: BaseFieldCommand.process_record""" if self._condition: self._delete_field_condition(record) else: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2) self._modifications += 1
def check_existing_pdg_fields(recids, pdg_data, current_records): _print_out("Comparing new and old PDG data for " + str(len(recids)) + " records...") records = {} for recid in recids: record_mod = {} record_mod['001'] = deepcopy(current_records[recid]['001']) record_mod['084'] = deepcopy(current_records[recid]['084']) fields = record_get_field_instances(record_mod, '084') current_pdg_data = [] for field in fields: if is_pdg_field(field): current_pdg_data.append( field_get_subfield_values(field, 'a')[0]) current_set = set(current_pdg_data) new_set = set(pdg_data[recid]) deletions = list(current_set - new_set) additions = list(new_set - current_set) if len(deletions) > 0 or len(additions) > 0: if len(deletions) > 0: for field in fields: if is_pdg_field(field): if field_get_subfield_values(field, 'a')[0] in deletions: record_delete_field(record_mod, '084', ind1=' ', ind2=' ', field_position_global=field[4]) for pdg_field in additions: position = record_add_field(record_mod, '084', ' ', ' ') record_add_subfield_into(record_mod, '084', '2', 'PDG', field_position_global=position) record_add_subfield_into(record_mod, '084', '9', 'PDG', field_position_global=position) record_add_subfield_into(record_mod, '084', 'a', pdg_field, field_position_global=position) records[recid] = record_mod _print_verbose("Record #" + str(recid) + ": " + str(len(deletions)) + " deletions and " + str(len(additions)) + " additons.") else: _print_verbose("Nothing to change for record #" + str(recid)) _print_out(str(len(records)) + " records to be corrected.") return records
def create_xml(recid, fname=None, oaff=None): affs = [a for a in oaff] record = get_record(recid) auth_location = record_get_field_instances(record, '100', '', '')[0][4] record_delete_field(record, '700', '', '') for x in affs: record_add_subfield_into(record, '100', 'u', x, field_position_global=auth_location) return print_rec(record)
def compare_references(test, a, b): ## Let's normalize records to remove the Invenio refextract signature a = create_record(a)[0] b = create_record(b)[0] record_delete_field(a, '999', 'C', '6') a = record_xml_output(a) b = record_xml_output(b) test.assertXmlEqual(a, b)
def record_drop_fields_matching_pattern(record, pattern, fields, tag): """Remove fields matching given pattern from record.""" field_positions = [] for field in fields: subfields = field_get_subfield_instances(field) for subfield in subfields: if re.match(pattern, subfield[1].lower(), re.IGNORECASE): field_positions.append((field[1], field[2], field[4])) break for ind1, ind2, pos in field_positions: record_delete_field(record, tag, ind1=ind1, ind2=ind2, field_position_global=pos)
def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield( record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field( record, tag, ind1, ind2) return record
def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield(record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field(record, tag, ind1, ind2) return record
def check_record(record): """ Split fields """ from invenio.bibrecord import record_delete_field from invenio.bibrecord import record_add_field message = "" marc = '693__e' tag = marc[:3] if not record.has_key(tag): continue ind1 = marc[3].replace('_', ' ') ind2 = marc[4].replace('_', ' ') sfcode = marc[5] to_split = fields_to_split(record, tag, ind1, ind2, sfcode) if not to_split: continue # work from the back to try to preserve order positions = to_split.keys() positions.sort(reverse=True) for global_pos in positions: (parts, rest_before, rest_after) = to_split[global_pos] message += " - split %s %s" % (tag, parts) record_delete_field(record, tag, ind1, ind2, field_position_global=global_pos) parts.reverse() for subfield in parts: field = rest_before + [ subfield, ] + rest_after record_add_field(record, tag, ind1, ind2, '', field, field_position_global=global_pos) if message: record.set_amended(message)
def _delete_field_condition(self, record): """Checks if a subfield meets the condition for the field to be deleted """ try: for field in record[self._tag]: for subfield in field[0]: if subfield[0] == self._conditionSubfield: if self._condition_exact_match: if self._condition == subfield[1]: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break else: if self._condition in subfield[1]: bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4]) self._modifications += 1 break except KeyError: pass
def check_existing_pdg_fields(recids, pdg_data, current_records): _print_out("Comparing new and old PDG data for " + str(len(recids)) + " records...") records = {} for recid in recids: record_mod = {} record_mod['001'] = deepcopy(current_records[recid]['001']) record_mod['084'] = deepcopy(current_records[recid]['084']) fields = record_get_field_instances(record_mod, '084') current_pdg_data = [] for field in fields: if is_pdg_field(field): current_pdg_data.append(field_get_subfield_values(field, 'a')[0]) current_set = set(current_pdg_data) new_set = set(pdg_data[recid]) deletions = list(current_set - new_set) additions = list(new_set - current_set) if len(deletions) > 0 or len(additions) > 0: if len(deletions) > 0: for field in fields: if is_pdg_field(field): if field_get_subfield_values(field, 'a')[0] in deletions: record_delete_field(record_mod, '084', ind1=' ', ind2=' ', field_position_global=field[4]) for pdg_field in additions: position = record_add_field(record_mod, '084', ' ', ' ') record_add_subfield_into(record_mod, '084', '2', 'PDG', field_position_global=position) record_add_subfield_into(record_mod, '084', '9', 'PDG', field_position_global=position) record_add_subfield_into(record_mod, '084', 'a', pdg_field, field_position_global=position) records[recid] = record_mod _print_verbose("Record #" + str(recid) + ": " + str(len(deletions)) + " deletions and " + str(len(additions)) + " additons.") else: _print_verbose("Nothing to change for record #" + str(recid)) _print_out(str(len(records)) + " records to be corrected.") return records
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \ hpChanges, undoRedoOp, isBulk=False): """Handle record update requests like adding, modifying, moving or deleting of fields or subfields. Possible common error situations: - Missing cache file - Cache file modified in other editor Explanation of some parameters: undoRedoOp - Indicates in "undo"/"redo"/undo_descriptor operation is performed by a current request. """ response = {} if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False: # In case of a bulk request, the changes are deliberately performed # imemdiately one after another response['resultCode'] = 107 else: try: record_revision, record, pending_changes, deactivated_hp_changes, \ undo_list, redo_list = get_cache_file_contents(recid, uid)[1:] except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'wrong_cache_file_format'] return response # process all the Holding Pen changes operations ... regardles the # request type # import rpdb2; # rpdb2.start_embedded_debugger('password', fAllowRemote=True) if hpChanges.has_key("toDisable"): for changeId in hpChanges["toDisable"]: pending_changes[changeId]["applied_change"] = True if hpChanges.has_key("toEnable"): for changeId in hpChanges["toEnable"]: pending_changes[changeId]["applied_change"] = False if hpChanges.has_key("toOverride"): pending_changes = hpChanges["toOverride"] if hpChanges.has_key("changesetsToDeactivate"): for changesetId in hpChanges["changesetsToDeactivate"]: deactivated_hp_changes[changesetId] = True if hpChanges.has_key("changesetsToActivate"): for changesetId in hpChanges["changesetsToActivate"]: deactivated_hp_changes[changesetId] = False # processing the undo/redo entries if undoRedoOp == "undo": try: redo_list = [undo_list[-1]] + redo_list undo_list = undo_list[:-1] except: raise Exception("An exception occured when undoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) elif undoRedoOp == "redo": try: undo_list = undo_list + [redo_list[0]] redo_list = redo_list[1:] except: raise Exception("An exception occured when redoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) else: # This is a genuine operation - we have to add a new descriptor # to the undo list and cancel the redo unless the operation is # a bulk operation if undoRedoOp != None: undo_list = undo_list + [undoRedoOp] redo_list = [] else: assert isBulk == True field_position_local = data.get('fieldPosition') if field_position_local is not None: field_position_local = int(field_position_local) if request_type == 'otherUpdateRequest': # An empty request. Might be useful if we want to perform # operations that require only the actions performed globally, # like modifying the holdingPen changes list response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'editor_modifications_changed'] elif request_type == 'deactivateHoldingPenChangeset': # the changeset has been marked as processed ( user applied it in # the editor). Marking as used in the cache file. # CAUTION: This function has been implemented here because logically # it fits with the modifications made to the cache file. # No changes are made to the Holding Pen physically. The # changesets are related to the cache because we want to # cancel the removal every time the cache disappears for # any reason response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'disabled_hp_changeset'] elif request_type == 'addField': if data['controlfield']: record_add_field(record, data['tag'], controlfield_value=data['value']) response['resultCode'] = 20 else: record_add_field(record, data['tag'], data['ind1'], data['ind2'], subfields=data['subfields'], field_position_local=field_position_local) response['resultCode'] = 21 elif request_type == 'addSubfields': subfields = data['subfields'] for subfield in subfields: record_add_subfield_into(record, data['tag'], subfield[0], subfield[1], subfield_position=None, field_position_local=field_position_local) if len(subfields) == 1: response['resultCode'] = 22 else: response['resultCode'] = 23 elif request_type == 'addFieldsSubfieldsOnPositions': #1) Sorting the fields by their identifiers fieldsToAdd = data['fieldsToAdd'] subfieldsToAdd = data['subfieldsToAdd'] for tag in fieldsToAdd.keys(): positions = fieldsToAdd[tag].keys() positions.sort() for position in positions: # now adding fields at a position isControlfield = (len(fieldsToAdd[tag][position][0]) == 0) # if there are n subfields, this is a control field if isControlfield: controlfieldValue = fieldsToAdd[tag][position][3] record_add_field(record, tag, field_position_local = \ int(position), \ controlfield_value = \ controlfieldValue) else: subfields = fieldsToAdd[tag][position][0] ind1 = fieldsToAdd[tag][position][1] ind2 = fieldsToAdd[tag][position][2] record_add_field(record, tag, ind1, ind2, subfields = \ subfields, field_position_local = \ int(position)) # now adding the subfields for tag in subfieldsToAdd.keys(): for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields #order not important ! subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \ keys() subfieldsPositions.sort() for subfieldPosition in subfieldsPositions: subfield = subfieldsToAdd[tag][fieldPosition]\ [subfieldPosition] record_add_subfield_into(record, tag, subfield[0], \ subfield[1], \ subfield_position = \ int(subfieldPosition), \ field_position_local = \ int(fieldPosition)) response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields'] elif request_type == 'modifyField': # changing the field structure # first remove subfields and then add new... change the indices subfields = data['subFields'] # parse the JSON representation of # the subfields here new_field = create_field(subfields, data['ind1'], data['ind2']) record_replace_field(record, data['tag'], new_field, \ field_position_local = data['fieldPosition']) response['resultCode'] = 26 elif request_type == 'modifyContent': if data['subfieldIndex'] != None: record_modify_subfield(record, data['tag'], data['subfieldCode'], data['value'], int(data['subfieldIndex']), field_position_local=field_position_local) else: record_modify_controlfield(record, data['tag'], data["value"], field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'moveSubfield': record_move_subfield(record, data['tag'], int(data['subfieldIndex']), int(data['newSubfieldIndex']), field_position_local=field_position_local) response['resultCode'] = 25 elif request_type == 'moveField': if data['direction'] == 'up': final_position_local = field_position_local-1 else: # direction is 'down' final_position_local = field_position_local+1 record_move_fields(record, data['tag'], [field_position_local], final_position_local) response['resultCode'] = 32 elif request_type == 'deleteFields': to_delete = data['toDelete'] deleted_fields = 0 deleted_subfields = 0 for tag in to_delete: #Sorting the fields in a edcreasing order by the local position! fieldsOrder = to_delete[tag].keys() fieldsOrder.sort(lambda a, b: int(b) - int(a)) for field_position_local in fieldsOrder: if not to_delete[tag][field_position_local]: # No subfields specified - delete entire field. record_delete_field(record, tag, field_position_local=int(field_position_local)) deleted_fields += 1 else: for subfield_position in \ to_delete[tag][field_position_local][::-1]: # Delete subfields in reverse order (to keep the # indexing correct). record_delete_subfield_from(record, tag, int(subfield_position), field_position_local=int(field_position_local)) deleted_subfields += 1 if deleted_fields == 1 and deleted_subfields == 0: response['resultCode'] = 26 elif deleted_fields and deleted_subfields == 0: response['resultCode'] = 27 elif deleted_subfields == 1 and deleted_fields == 0: response['resultCode'] = 28 elif deleted_subfields and deleted_fields == 0: response['resultCode'] = 29 else: response['resultCode'] = 30 response['cacheMTime'], response['cacheDirty'] = \ update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, \ undo_list, redo_list), \ True return response
def process_record(self, record): """@see: BaseFieldCommand.process_record""" bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2) self._modifications += 1
ind1="%", ind2="%") if fields_500 is not None: field_positions = [] for field in fields_500: subfields = field_get_subfield_instances(field) for subfield in subfields: if re.match("^.?((temporary|brief) entry).?$", subfield[1].lower(), re.IGNORECASE): field_positions.append( (field[1], field[2], field[4])) for ind1, ind2, pos in field_positions: record_delete_field(record, '500', ind1=ind1, ind2=ind2, field_position_global=pos) # Now compare new version with existing one, returning a diff[tag] = (diffcode, [..]) # None - if field is the same for both records # ('r',) - field missing from input record, ignored ATM # ('a',) - new field added, should be updated with append # ('c', difference_comparison) -> if field field_id exists in both records, but it's value has changed # -> uploaded with correct if accepted fields_to_add = [] fields_to_correct = [] holdingpen = False difference = record_diff(existing_record, record,
def apply_filter(rec): """ Filters the record to be compatible within Inspire Parameters: * rec - dictionary: BibRecord structure Returns: dictionary, BibRecord structure """ # Move recid from 001 to 035 if not hidden cds_id = rec['001'][0][3] if not 'hidden' in [ x.lower() for x in record_get_field_values(rec, "980", code="a") ]: record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)]) # Clear control fields record_strip_controlfields(rec) # Clear other uninteresting fields interesting_fields = [ "024", "041", "035", "037", "088", "100", "110", "111", "242", "245", "246", "260", "269", "300", "502", "650", "653", "693", "700", "710", "773", "856", "520", "500", "980" ] for tag in rec.keys(): if tag not in interesting_fields: record_delete_fields(rec, tag) # 980 Determine Collections collections = set([]) for value in record_get_field_values(rec, '980', code='a'): if 'NOTE' in value.upper(): collections.add('NOTE') if 'THESIS' in value.upper(): collections.add('THESIS') if 'CONFERENCEPAPER' in value.upper(): collections.add('ConferencePaper') if is_published(rec): collections.add("PUBLISHED") collections.add("CITEABLE") if not 'NOTE' in collections: # TODO: Move this to a KB kb = [ 'ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-', 'ALICE-INT-', 'LHCb-PUB-' ] values = record_get_field_values(rec, "088", code='a') for val, rep in product(values, kb): if val.startswith(rep): collections.add('NOTE') break # 980 Arxiv tag if record_get_field_values(rec, '035', filter_subfield_code="a", filter_subfield_value="arXiv"): collections.add("arXiv") # 980 HEP && CORE collections.add('HEP') collections.add('CORE') # 980 Conference Note if not 'ConferencePaper' in collections: for value in record_get_field_values(rec, '962', code='n'): if value[-2:].isdigit(): collections.add('ConferencePaper') break record_delete_fields(rec, "980") intnote = record_get_field_values(rec, '690', filter_subfield_code="a", filter_subfield_value='INTNOTE') if intnote: val_088 = record_get_field_values(rec, '088', filter_subfield_code="a") for val in val_088: if 'CMS' in val: url = ('http://weblib.cern.ch/abstract?CERN-CMS' + val.split('CMS', 1)[-1]) record_add_field(rec, '856', ind1='4', subfields=[('u', url)]) # 041 Language languages = get_languages() language_fields = record_get_field_instances(rec, '041') record_delete_fields(rec, "041") for field in language_fields: subs = field_get_subfields(field) if 'a' in subs: if "eng" in subs['a']: continue new_value = translate_config(subs['a'][0], languages) new_subs = [('a', new_value)] record_add_field(rec, "041", subfields=new_subs) # 035 Externals scn_035_fields = record_get_field_instances(rec, '035') forbidden_values = [ "cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01" ] for field in scn_035_fields: subs = field_get_subfields(field) if '9' in subs: if not 'a' in subs: continue for sub in subs['9']: if sub.lower() in forbidden_values: break else: # No forbidden values (We did not "break") suffixes = [s.lower() for s in subs['9']] if 'spires' in suffixes: new_subs = [('a', 'SPIRES-%s' % subs['a'][0])] record_add_field(rec, '970', subfields=new_subs) continue if 'a' in subs: for sub in subs['a']: if sub.lower() in forbidden_values: record_delete_field(rec, tag="035", field_position_global=field[4]) rep_088_fields = record_get_field_instances(rec, '088') for field in rep_088_fields: subs = field_get_subfields(field) if '9' in subs: for val in subs['9']: if val.startswith('P0') or val.startswith('CM-P0'): sf = [('9', 'CERN'), ('b', val)] record_add_field(rec, '595', subfields=sf) for key, val in field[0]: if key in ['a', '9'] and not val.startswith('SIS-'): record_add_field(rec, '037', subfields=[('a', val)]) record_delete_fields(rec, "088") # 037 Externals also... rep_037_fields = record_get_field_instances(rec, '037') for field in rep_037_fields: subs = field_get_subfields(field) if 'a' in subs: for value in subs['a']: if 'arXiv' in value: new_subs = [('a', value), ('9', 'arXiv')] for fld in record_get_field_instances(rec, '695'): for key, val in field_get_subfield_instances(fld): if key == 'a': new_subs.append(('c', val)) break nf = create_field(subfields=new_subs) record_replace_field(rec, '037', nf, field[4]) for key, val in field[0]: if key in ['a', '9'] and val.startswith('SIS-'): record_delete_field(rec, '037', field_position_global=field[4]) for field in record_get_field_instances(rec, '242'): record_add_field(rec, '246', subfields=field[0]) record_delete_fields(rec, '242') # 269 Date normalization for field in record_get_field_instances(rec, '269'): for idx, (key, value) in enumerate(field[0]): if key == "c": field[0][idx] = ("c", convert_date_to_iso(value)) record_delete_fields(rec, "260") if not 'THESIS' in collections: for field in record_get_field_instances(rec, '260'): record_add_field(rec, '269', subfields=field[0]) record_delete_fields(rec, '260') # 300 page number for field in record_get_field_instances(rec, '300'): for idx, (key, value) in enumerate(field[0]): if key == 'a': if "mult." not in value and value != " p": field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value)) else: record_delete_field(rec, '300', field_position_global=field[4]) break # 100 & 700 punctuate author names author_names = record_get_field_instances(rec, '100') author_names.extend(record_get_field_instances(rec, '700')) for field in author_names: subs = field_get_subfields(field) if not 'i' in subs or 'XX' in subs['i']: if not 'j' in subs or 'YY' in subs['j']: for idx, (key, value) in enumerate(field[0]): if key == 'a': field[0][idx] = ('a', punctuate_authorname(value)) # 700 -> 701 Thesis supervisors if 'THESIS' in collections: for field in record_get_field_instances(rec, '700'): record_add_field(rec, '701', subfields=field[0]) record_delete_fields(rec, '700') # 501 move subfields fields_501 = record_get_field_instances(rec, '502') for idx, field in enumerate(fields_501): new_subs = [] for key, value in field[0]: if key == 'a': new_subs.append(('b', value)) elif key == 'b': new_subs.append(('c', value)) elif key == 'c': new_subs.append(('d', value)) else: new_subs.append((key, value)) fields_501[idx] = field_swap_subfields(field, new_subs) # 650 Translate Categories categories = get_categories() category_fields = record_get_field_instances(rec, '650', ind1='1', ind2='7') record_delete_fields(rec, "650") for field in category_fields: for idx, (key, value) in enumerate(field[0]): if key == 'a': new_value = translate_config(value, categories) if new_value != value: new_subs = [('2', 'INSPIRE'), ('a', new_value)] else: new_subs = [('2', 'SzGeCERN'), ('a', value)] record_add_field(rec, "650", ind1="1", ind2="7", subfields=new_subs) break # 653 Free Keywords for field in record_get_field_instances(rec, '653', ind1='1'): subs = field_get_subfields(field) new_subs = [] if 'a' in subs: for val in subs['a']: new_subs.extend([('9', 'author'), ('a', val)]) new_field = create_field(subfields=new_subs, ind1='1') record_replace_field(rec, '653', new_field, field_position_global=field[4]) experiments = get_experiments() # 693 Remove if 'not applicable' for field in record_get_field_instances(rec, '693'): subs = field_get_subfields(field) all_subs = subs.get('a', []) + subs.get('e', []) if 'not applicable' in [x.lower() for x in all_subs]: record_delete_field(rec, '693', field_position_global=field[4]) new_subs = [] experiment_a = "" experiment_e = "" for (key, value) in subs.iteritems(): if key == 'a': experiment_a = value[0] new_subs.append((key, value[0])) elif key == 'e': experiment_e = value[0] experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e) translated_experiments = translate_config(experiment, experiments) new_subs.append(("e", translated_experiments)) record_delete_field(rec, tag="693", field_position_global=field[4]) record_add_field(rec, "693", subfields=new_subs) # 710 Collaboration for field in record_get_field_instances(rec, '710'): subs = field_get_subfield_instances(field) for idx, (key, value) in enumerate(subs[:]): if key == '5': subs.pop(idx) elif value.startswith('CERN. Geneva'): subs.pop(idx) if len(subs) == 0: record_delete_field(rec, '710', field_position_global=field[4]) # 773 journal translations journals = get_journals() for field in record_get_field_instances(rec, '773'): subs = field_get_subfield_instances(field) new_subs = [] for idx, (key, value) in enumerate(subs): if key == 'p': new_subs.append((key, translate_config(value, journals))) else: new_subs.append((key, value)) record_delete_field(rec, tag="773", field_position_global=field[4]) record_add_field(rec, "773", subfields=new_subs) # FFT (856) Dealing with graphs figure_counter = 0 for field in record_get_field_instances(rec, '856', ind1='4'): subs = field_get_subfields(field) newsubs = [] remove = False if 'z' in subs: is_figure = [s for s in subs['z'] if "figure" in s.lower()] if is_figure and 'u' in subs: is_subformat = [ s for s in subs['u'] if "subformat" in s.lower() ] if not is_subformat: url = subs['u'][0] if url.endswith(".pdf"): # We try to convert fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR) os.close(fd) _print("Downloading %s into %s" % (url, local_url), verbose=5) plotfile = "" try: plotfile = download_url(url=url, download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print( "Download failed while attempting to reach %s. Skipping.." % (url, )) remove = True if plotfile: converted = convert_images([plotfile]) if converted: url = converted.pop() _print("Successfully converted %s to %s" % (local_url, url), verbose=5) else: _print("Conversion failed on %s" % (local_url, )) url = None remove = True if url: newsubs.append(('a', url)) newsubs.append(('t', 'Plot')) figure_counter += 1 if 'y' in subs: newsubs.append( ('d', "%05d %s" % (figure_counter, subs['y'][0]))) newsubs.append(('n', subs['y'][0])) else: # Get basename without extension. name = os.path.basename( os.path.splitext(subs['u'][0])[0]) newsubs.append( ('d', "%05d %s" % (figure_counter, name))) newsubs.append(('n', name)) if not newsubs and 'u' in subs: is_fulltext = [s for s in subs['u'] if ".pdf" in s] if is_fulltext: newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])] if not newsubs and 'u' in subs: remove = True is_zipfile = [s for s in subs['u'] if ".zip" in s] if is_zipfile: url = is_zipfile[0] local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url)) _print("Downloading %s into %s" % (url, local_url), verbose=5) zipped_archive = "" try: zipped_archive = download_url(url=is_zipfile[0], download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print( "Download failed while attempting to reach %s. Skipping.." % (is_zipfile[0], )) remove = True if zipped_archive: unzipped_archive = unzip(zipped_archive) list_of_pngs = locate("*.png", unzipped_archive) for png in list_of_pngs: if "_vti_" in png or "__MACOSX" in png: continue figure_counter += 1 plotsubs = [] plotsubs.append(('a', png)) caption = '%05d %s' % (figure_counter, os.path.basename(png)) plotsubs.append(('d', caption)) plotsubs.append(('t', 'Plot')) record_add_field(rec, 'FFT', subfields=plotsubs) if not remove and not newsubs and 'u' in subs: urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch', 'http://cmsdoc.cern.ch', 'http://documents.cern.ch', 'http://preprints.cern.ch', 'http://cds.cern.ch') for val in subs['u']: if any(url in val for url in urls): remove = True break if val.endswith('ps.gz'): remove = True if newsubs: record_add_field(rec, 'FFT', subfields=newsubs) remove = True if remove: record_delete_field(rec, '856', ind1='4', field_position_global=field[4]) # 500 - Preliminary results if "THESIS" not in collections: subs = [('a', "Preliminary results")] record_add_field(rec, "500", subfields=subs) for collection in collections: record_add_field(rec, '980', subfields=[('a', collection)]) return rec
def check_variants(record, sort_variants): """ are variants unique? list variants only once, delete more fields if no other subfield (e.g. $$b), set_invalid otherwise normalize variants option to sort variants """ from invenio.bibrecord import record_delete_field, record_add_field from operator import itemgetter import copy all_variants = [] recid = record.record_id for field in record['730']: # get info for field name = '' othersubfields = False position_name = None letter = ' ' for num_sf, (code, value) in enumerate(field[0]): if code == 'a': name = value position_name = num_sf else: othersubfields = True if code == 'b': letter = value if not name: record.set_invalid('field 730 without $$a subfield: %s. ' % (field, )) sort_variants = False continue norm_name = normalize_name(name) # is it normalized? if not name == norm_name: record.set_amended('normalized name variant: "%s" ' % name) field[0][position_name] = ('a', norm_name) if norm_name in all_variants: # avoid adding variants multiple times if othersubfields: # let a human do this record.set_invalid('variant is listed twice: "%s" ' % name) else: record.set_amended('deleted already existing variant: "%s" ' % name) record_delete_field(record, '730', field_position_global=field[4]) else: all_variants.append(norm_name) result = searchforothervariant(norm_name, recid) if result: record.set_invalid('Name variant "%s" exists in other record %s. ' % (norm_name, result)) if sort_variants: # sort by letter ($$b), length (longest first), name sort_index = {} for num_f, field in enumerate(record['730']): # get info for field name = '' letter = ' ' for code, value in field[0]: if code == 'a': name = value elif code == 'b': letter = value sort_index[num_f] = (letter, len(name)*-1, name) m730 = copy.deepcopy(record['730']) sorted_keys = [k for k, _ in sorted(sort_index.items(), key=itemgetter(1))] m730_sort = [m730[num_f] for num_f in sorted_keys] if m730_sort != m730: # we have to get rid of global positions to really sort it record.set_amended('Name variants sorted. ') record_delete_field(record, '730') for field in m730_sort: record_add_field(record, '730', ind1=' ', ind2=' ', subfields=field[0], controlfield_value='') return all_variants
def apply_filter(rec): """ Filters the record to be compatible within Inspire Parameters: * rec - dictionary: BibRecord structure Returns: dictionary, BibRecord structure """ # Move recid from 001 to 035 if not hidden cds_id = rec['001'][0][3] if not 'hidden' in [x.lower() for x in record_get_field_values(rec, "980", code="a")]: record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)]) # Clear control fields record_strip_controlfields(rec) # Clear other uninteresting fields interesting_fields = ["024", "041", "035", "037", "088", "100", "110", "111", "242", "245", "246", "260", "269", "300", "502", "650", "653", "693", "700", "710", "773", "856", "520", "500", "980"] for tag in rec.keys(): if tag not in interesting_fields: record_delete_fields(rec, tag) # 980 Determine Collections collections = set([]) for value in record_get_field_values(rec, '980', code='a'): if 'NOTE' in value.upper(): collections.add('NOTE') if 'THESIS' in value.upper(): collections.add('THESIS') if 'CONFERENCEPAPER' in value.upper(): collections.add('ConferencePaper') if is_published(rec): collections.add("PUBLISHED") collections.add("CITEABLE") if not 'NOTE' in collections: # TODO: Move this to a KB kb = ['ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-', 'ALICE-INT-', 'LHCb-PUB-'] values = record_get_field_values(rec, "088", code='a') for val, rep in product(values, kb): if val.startswith(rep): collections.add('NOTE') break # 980 Arxiv tag if record_get_field_values(rec, '035', filter_subfield_code="a", filter_subfield_value="arXiv"): collections.add("arXiv") # 980 HEP && CORE collections.add('HEP') collections.add('CORE') # 980 Conference Note if not 'ConferencePaper' in collections: for value in record_get_field_values(rec, '962', code='n'): if value[-2:].isdigit(): collections.add('ConferencePaper') break record_delete_fields(rec, "980") intnote = record_get_field_values(rec, '690', filter_subfield_code="a", filter_subfield_value='INTNOTE') if intnote: val_088 = record_get_field_values(rec, '088', filter_subfield_code="a") for val in val_088: if 'CMS' in val: url = ('http://weblib.cern.ch/abstract?CERN-CMS' + val.split('CMS', 1)[-1]) record_add_field(rec, '856', ind1='4', subfields=[('u', url)]) # 041 Language languages = get_languages() language_fields = record_get_field_instances(rec, '041') record_delete_fields(rec, "041") for field in language_fields: subs = field_get_subfields(field) if 'a' in subs: if "eng" in subs['a']: continue new_value = translate_config(subs['a'][0], languages) new_subs = [('a', new_value)] record_add_field(rec, "041", subfields=new_subs) # 035 Externals scn_035_fields = record_get_field_instances(rec, '035') forbidden_values = ["cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"] for field in scn_035_fields: subs = field_get_subfields(field) if '9' in subs: if not 'a' in subs: continue for sub in subs['9']: if sub.lower() in forbidden_values: break else: # No forbidden values (We did not "break") suffixes = [s.lower() for s in subs['9']] if 'spires' in suffixes: new_subs = [('a', 'SPIRES-%s' % subs['a'][0])] record_add_field(rec, '970', subfields=new_subs) continue if 'a' in subs: for sub in subs['a']: if sub.lower() in forbidden_values: record_delete_field(rec, tag="035", field_position_global=field[4]) rep_088_fields = record_get_field_instances(rec, '088') for field in rep_088_fields: subs = field_get_subfields(field) if '9' in subs: for val in subs['9']: if val.startswith('P0') or val.startswith('CM-P0'): sf = [('9', 'CERN'), ('b', val)] record_add_field(rec, '595', subfields=sf) for key, val in field[0]: if key in ['a', '9'] and not val.startswith('SIS-'): record_add_field(rec, '037', subfields=[('a', val)]) record_delete_fields(rec, "088") # 037 Externals also... rep_037_fields = record_get_field_instances(rec, '037') for field in rep_037_fields: subs = field_get_subfields(field) if 'a' in subs: for value in subs['a']: if 'arXiv' in value: new_subs = [('a', value), ('9', 'arXiv')] for fld in record_get_field_instances(rec, '695'): for key, val in field_get_subfield_instances(fld): if key == 'a': new_subs.append(('c', val)) break nf = create_field(subfields=new_subs) record_replace_field(rec, '037', nf, field[4]) for key, val in field[0]: if key in ['a', '9'] and val.startswith('SIS-'): record_delete_field(rec, '037', field_position_global=field[4]) for field in record_get_field_instances(rec, '242'): record_add_field(rec, '246', subfields=field[0]) record_delete_fields(rec, '242') # 269 Date normalization for field in record_get_field_instances(rec, '269'): for idx, (key, value) in enumerate(field[0]): if key == "c": field[0][idx] = ("c", convert_date_to_iso(value)) record_delete_fields(rec, "260") if not 'THESIS' in collections: for field in record_get_field_instances(rec, '260'): record_add_field(rec, '269', subfields=field[0]) record_delete_fields(rec, '260') # 300 page number for field in record_get_field_instances(rec, '300'): for idx, (key, value) in enumerate(field[0]): if key == 'a': if "mult." not in value and value != " p": field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value)) else: record_delete_field(rec, '300', field_position_global=field[4]) break # 100 & 700 punctuate author names author_names = record_get_field_instances(rec, '100') author_names.extend(record_get_field_instances(rec, '700')) for field in author_names: subs = field_get_subfields(field) if not 'i' in subs or 'XX' in subs['i']: if not 'j' in subs or 'YY' in subs['j']: for idx, (key, value) in enumerate(field[0]): if key == 'a': field[0][idx] = ('a', punctuate_authorname(value)) # 700 -> 701 Thesis supervisors if 'THESIS' in collections: for field in record_get_field_instances(rec, '700'): record_add_field(rec, '701', subfields=field[0]) record_delete_fields(rec, '700') # 501 move subfields fields_501 = record_get_field_instances(rec, '502') for idx, field in enumerate(fields_501): new_subs = [] for key, value in field[0]: if key == 'a': new_subs.append(('b', value)) elif key == 'b': new_subs.append(('c', value)) elif key == 'c': new_subs.append(('d', value)) else: new_subs.append((key, value)) fields_501[idx] = field_swap_subfields(field, new_subs) # 650 Translate Categories categories = get_categories() category_fields = record_get_field_instances(rec, '650', ind1='1', ind2='7') record_delete_fields(rec, "650") for field in category_fields: for idx, (key, value) in enumerate(field[0]): if key == 'a': new_value = translate_config(value, categories) if new_value != value: new_subs = [('2', 'INSPIRE'), ('a', new_value)] else: new_subs = [('2', 'SzGeCERN'), ('a', value)] record_add_field(rec, "650", ind1="1", ind2="7", subfields=new_subs) break # 653 Free Keywords for field in record_get_field_instances(rec, '653', ind1='1'): subs = field_get_subfields(field) new_subs = [] if 'a' in subs: for val in subs['a']: new_subs.extend([('9', 'author'), ('a', val)]) new_field = create_field(subfields=new_subs, ind1='1') record_replace_field(rec, '653', new_field, field_position_global=field[4]) experiments = get_experiments() # 693 Remove if 'not applicable' for field in record_get_field_instances(rec, '693'): subs = field_get_subfields(field) all_subs = subs.get('a', []) + subs.get('e', []) if 'not applicable' in [x.lower() for x in all_subs]: record_delete_field(rec, '693', field_position_global=field[4]) new_subs = [] experiment_a = "" experiment_e = "" for (key, value) in subs.iteritems(): if key == 'a': experiment_a = value[0] new_subs.append((key, value[0])) elif key == 'e': experiment_e = value[0] experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e) translated_experiments = translate_config(experiment, experiments) new_subs.append(("e", translated_experiments)) record_delete_field(rec, tag="693", field_position_global=field[4]) record_add_field(rec, "693", subfields=new_subs) # 710 Collaboration for field in record_get_field_instances(rec, '710'): subs = field_get_subfield_instances(field) for idx, (key, value) in enumerate(subs[:]): if key == '5': subs.pop(idx) elif value.startswith('CERN. Geneva'): subs.pop(idx) if len(subs) == 0: record_delete_field(rec, '710', field_position_global=field[4]) # 773 journal translations journals = get_journals() for field in record_get_field_instances(rec, '773'): subs = field_get_subfield_instances(field) new_subs = [] for idx, (key, value) in enumerate(subs): if key == 'p': new_subs.append((key, translate_config(value, journals))) else: new_subs.append((key, value)) record_delete_field(rec, tag="773", field_position_global=field[4]) record_add_field(rec, "773", subfields=new_subs) # FFT (856) Dealing with graphs figure_counter = 0 for field in record_get_field_instances(rec, '856', ind1='4'): subs = field_get_subfields(field) newsubs = [] remove = False if 'z' in subs: is_figure = [s for s in subs['z'] if "figure" in s.lower()] if is_figure and 'u' in subs: is_subformat = [s for s in subs['u'] if "subformat" in s.lower()] if not is_subformat: url = subs['u'][0] if url.endswith(".pdf"): # We try to convert fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR) os.close(fd) _print("Downloading %s into %s" % (url, local_url), verbose=5) plotfile = "" try: plotfile = download_url(url=url, download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print("Download failed while attempting to reach %s. Skipping.." % (url,)) remove = True if plotfile: converted = convert_images([plotfile]) if converted: url = converted.pop() _print("Successfully converted %s to %s" % (local_url, url), verbose=5) else: _print("Conversion failed on %s" % (local_url,)) url = None remove = True if url: newsubs.append(('a', url)) newsubs.append(('t', 'Plot')) figure_counter += 1 if 'y' in subs: newsubs.append(('d', "%05d %s" % (figure_counter, subs['y'][0]))) newsubs.append(('n', subs['y'][0])) else: # Get basename without extension. name = os.path.basename(os.path.splitext(subs['u'][0])[0]) newsubs.append(('d', "%05d %s" % (figure_counter, name))) newsubs.append(('n', name)) if not newsubs and 'u' in subs: is_fulltext = [s for s in subs['u'] if ".pdf" in s and not "subformat=pdfa" in s] if is_fulltext: newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])] if not newsubs and 'u' in subs: remove = True is_zipfile = [s for s in subs['u'] if ".zip" in s] if is_zipfile: url = is_zipfile[0] local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url)) _print("Downloading %s into %s" % (url, local_url), verbose=5) zipped_archive = "" try: zipped_archive = download_url(url=is_zipfile[0], download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print("Download failed while attempting to reach %s. Skipping.." % (is_zipfile[0],)) remove = True if zipped_archive: unzipped_archive = unzip(zipped_archive) list_of_pngs = locate("*.png", unzipped_archive) for png in list_of_pngs: if "_vti_" in png or "__MACOSX" in png: continue figure_counter += 1 plotsubs = [] plotsubs.append(('a', png)) caption = '%05d %s' % (figure_counter, os.path.basename(png)) plotsubs.append(('d', caption)) plotsubs.append(('t', 'Plot')) record_add_field(rec, 'FFT', subfields=plotsubs) if not remove and not newsubs and 'u' in subs: urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch', 'http://cmsdoc.cern.ch', 'http://documents.cern.ch', 'http://preprints.cern.ch', 'http://cds.cern.ch') for val in subs['u']: if any(url in val for url in urls): remove = True break if val.endswith('ps.gz'): remove = True if newsubs: record_add_field(rec, 'FFT', subfields=newsubs) remove = True if remove: record_delete_field(rec, '856', ind1='4', field_position_global=field[4]) # 500 - Preliminary results if "THESIS" not in collections: subs = [('a', "Preliminary results")] record_add_field(rec, "500", subfields=subs) for collection in collections: record_add_field(rec, '980', subfields=[('a', collection)]) return rec
def compare_records(self, record1, record2, opt_mode=None): """ Compares two records to identify added/modified/deleted tags. The records are either the upload record or existing record or record archived. Returns a Tuple of Dictionaries(For modified/added/deleted tags). """ def remove_control_tag(tag_list): """ Returns the list of keys without any control tags """ cleaned_list = [item for item in tag_list if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS] return cleaned_list def group_record_tags(): """ Groups all the tags in a Record as Common/Added/Deleted tags. Returns a Tuple of 3 lists for each category mentioned above. """ rec1_keys = record1.keys() rec2_keys = record2.keys() com_tag_lst = [key for key in rec1_keys if key in rec2_keys] # tags in record2 not present in record1 del_tag_lst = [key for key in rec2_keys if key not in rec1_keys] # additional tags in record1 add_tag_lst = [key for key in rec1_keys if key not in rec2_keys] return (com_tag_lst, add_tag_lst, del_tag_lst) # declaring dictionaries to hold the identified patch mod_patch = {} add_patch = {} del_patch = {} result = {} (common_tags, added_tags, deleted_tags) = group_record_tags() if common_tags: mod_patch = self.find_modified_tags(common_tags, record1, record2) if added_tags: for tag in added_tags: add_patch[tag] = record1[tag] # if record comes with correct, it should already have fields # marked with '0' code. If not deleted tag list will if deleted_tags and \ opt_mode == 'replace' or opt_mode == 'delete': for tag in deleted_tags: del_patch[tag] = record2[tag] # returning back a result dictionary with all available patches if mod_patch: result['MOD'] = mod_patch if add_patch: result['ADD'] = add_patch if del_patch: # for a tag that has been deleted in the upload record in replace # mode, loop through all the fields of the tag and add additional # subfield with code '0' and value '__DELETE_FIELDS__' # NOTE Indicators taken into consideration while deleting fields for tag in del_patch: for data_tuple in del_patch[tag]: ind1 = data_tuple[1] ind2 = data_tuple[2] record_delete_field(del_patch, tag, ind1, ind2) record_add_field(del_patch, tag, ind1, ind2, "", [(CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)]) result['DEL'] = del_patch return result
def apply_filter(rec): """ Filters the record to be compatible within Inspire Parameters: * rec - dictionary: BibRecord structure Returns: dictionary, BibRecord structure """ # Move recid from 001 to 035 if not hidden cds_id = rec["001"][0][3] if not "hidden" in [x.lower() for x in record_get_field_values(rec, "980", code="a")]: record_add_field(rec, "035", subfields=[("9", "CDS"), ("a", cds_id)]) # Clear control fields record_strip_controlfields(rec) # Clear other uninteresting fields interesting_fields = [ "024", "041", "035", "037", "088", "100", "110", "111", "242", "245", "246", "260", "269", "300", "502", "650", "653", "693", "700", "710", "773", "856", "520", "500", "980", ] for tag in rec.keys(): if tag not in interesting_fields: record_delete_fields(rec, tag) # 980 Determine Collections collections = set([]) for value in record_get_field_values(rec, "980", code="a"): if "NOTE" in value.upper(): collections.add("NOTE") if "THESIS" in value.upper(): collections.add("THESIS") if "CONFERENCEPAPER" in value.upper(): collections.add("ConferencePaper") if is_published(rec): collections.add("PUBLISHED") collections.add("CITEABLE") if not "NOTE" in collections: # TODO: Move this to a KB kb = ["ATLAS-CONF-", "CMS-PAS-", "ATL-", "CMS-DP-", "ALICE-INT-", "LHCb-PUB-"] values = record_get_field_values(rec, "088", code="a") for val, rep in product(values, kb): if val.startswith(rep): collections.add("NOTE") break # 980 Arxiv tag if record_get_field_values(rec, "035", filter_subfield_code="a", filter_subfield_value="arXiv"): collections.add("arXiv") # 980 HEP && CORE collections.add("HEP") collections.add("CORE") # 980 Conference Note if not "ConferencePaper" in collections: for value in record_get_field_values(rec, "962", code="n"): if value[-2:].isdigit(): collections.add("ConferencePaper") break record_delete_fields(rec, "980") intnote = record_get_field_values(rec, "690", filter_subfield_code="a", filter_subfield_value="INTNOTE") if intnote: val_088 = record_get_field_values(rec, "088", filter_subfield_code="a") for val in val_088: if "CMS" in val: url = "http://weblib.cern.ch/abstract?CERN-CMS" + val.split("CMS", 1)[-1] record_add_field(rec, "856", ind1="4", subfields=[("u", url)]) # 041 Language languages = get_languages() language_fields = record_get_field_instances(rec, "041") record_delete_fields(rec, "041") for field in language_fields: subs = field_get_subfields(field) if "a" in subs: if "eng" in subs["a"]: continue new_value = translate_config(subs["a"][0], languages) new_subs = [("a", new_value)] record_add_field(rec, "041", subfields=new_subs) # 035 Externals scn_035_fields = record_get_field_instances(rec, "035") forbidden_values = ["cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"] for field in scn_035_fields: subs = field_get_subfields(field) if "9" in subs: if not "a" in subs: continue for sub in subs["9"]: if sub.lower() in forbidden_values: break else: # No forbidden values (We did not "break") suffixes = [s.lower() for s in subs["9"]] if "spires" in suffixes: new_subs = [("a", "SPIRES-%s" % subs["a"][0])] record_add_field(rec, "970", subfields=new_subs) continue if "a" in subs: for sub in subs["a"]: if sub.lower() in forbidden_values: record_delete_field(rec, tag="035", field_position_global=field[4]) rep_088_fields = record_get_field_instances(rec, "088") for field in rep_088_fields: subs = field_get_subfields(field) if "9" in subs: for val in subs["9"]: if val.startswith("P0") or val.startswith("CM-P0"): sf = [("9", "CERN"), ("b", val)] record_add_field(rec, "595", subfields=sf) for key, val in field[0]: if key in ["a", "9"] and not val.startswith("SIS-"): record_add_field(rec, "037", subfields=[("a", val)]) record_delete_fields(rec, "088") # 037 Externals also... rep_037_fields = record_get_field_instances(rec, "037") for field in rep_037_fields: subs = field_get_subfields(field) if "a" in subs: for value in subs["a"]: if "arXiv" in value: new_subs = [("a", value), ("9", "arXiv")] for fld in record_get_field_instances(rec, "695"): for key, val in field_get_subfield_instances(fld): if key == "a": new_subs.append(("c", val)) break nf = create_field(subfields=new_subs) record_replace_field(rec, "037", nf, field[4]) for key, val in field[0]: if key in ["a", "9"] and val.startswith("SIS-"): record_delete_field(rec, "037", field_position_global=field[4]) for field in record_get_field_instances(rec, "242"): record_add_field(rec, "246", subfields=field[0]) record_delete_fields(rec, "242") # 269 Date normalization for field in record_get_field_instances(rec, "269"): for idx, (key, value) in enumerate(field[0]): if key == "c": field[0][idx] = ("c", convert_date_to_iso(value)) record_delete_fields(rec, "260") if not "THESIS" in collections: for field in record_get_field_instances(rec, "260"): record_add_field(rec, "269", subfields=field[0]) record_delete_fields(rec, "260") # 300 page number for field in record_get_field_instances(rec, "300"): for idx, (key, value) in enumerate(field[0]): if key == "a": if "mult." not in value and value != " p": field[0][idx] = ("a", re.sub(r"[^\d-]+", "", value)) else: record_delete_field(rec, "300", field_position_global=field[4]) break # 100 & 700 punctuate author names author_names = record_get_field_instances(rec, "100") author_names.extend(record_get_field_instances(rec, "700")) for field in author_names: subs = field_get_subfields(field) if not "i" in subs or "XX" in subs["i"]: if not "j" in subs or "YY" in subs["j"]: for idx, (key, value) in enumerate(field[0]): if key == "a": field[0][idx] = ("a", punctuate_authorname(value)) # 700 -> 701 Thesis supervisors if "THESIS" in collections: for field in record_get_field_instances(rec, "700"): record_add_field(rec, "701", subfields=field[0]) record_delete_fields(rec, "700") # 501 move subfields fields_501 = record_get_field_instances(rec, "502") for idx, field in enumerate(fields_501): new_subs = [] for key, value in field[0]: if key == "a": new_subs.append(("b", value)) elif key == "b": new_subs.append(("c", value)) elif key == "c": new_subs.append(("d", value)) else: new_subs.append((key, value)) fields_501[idx] = field_swap_subfields(field, new_subs) # 650 Translate Categories categories = get_categories() category_fields = record_get_field_instances(rec, "650", ind1="1", ind2="7") record_delete_fields(rec, "650") for field in category_fields: for idx, (key, value) in enumerate(field[0]): if key == "a": new_value = translate_config(value, categories) if new_value != value: new_subs = [("2", "INSPIRE"), ("a", new_value)] else: new_subs = [("2", "SzGeCERN"), ("a", value)] record_add_field(rec, "650", ind1="1", ind2="7", subfields=new_subs) break # 653 Free Keywords for field in record_get_field_instances(rec, "653", ind1="1"): subs = field_get_subfields(field) new_subs = [] if "a" in subs: for val in subs["a"]: new_subs.extend([("9", "author"), ("a", val)]) new_field = create_field(subfields=new_subs, ind1="1") record_replace_field(rec, "653", new_field, field_position_global=field[4]) experiments = get_experiments() # 693 Remove if 'not applicable' for field in record_get_field_instances(rec, "693"): subs = field_get_subfields(field) all_subs = subs.get("a", []) + subs.get("e", []) if "not applicable" in [x.lower() for x in all_subs]: record_delete_field(rec, "693", field_position_global=field[4]) new_subs = [] experiment_a = "" experiment_e = "" for (key, value) in subs.iteritems(): if key == "a": experiment_a = value[0] new_subs.append((key, value[0])) elif key == "e": experiment_e = value[0] experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e) translated_experiments = translate_config(experiment, experiments) new_subs.append(("e", translated_experiments)) record_delete_field(rec, tag="693", field_position_global=field[4]) record_add_field(rec, "693", subfields=new_subs) # 710 Collaboration for field in record_get_field_instances(rec, "710"): subs = field_get_subfield_instances(field) for idx, (key, value) in enumerate(subs[:]): if key == "5": subs.pop(idx) elif value.startswith("CERN. Geneva"): subs.pop(idx) if len(subs) == 0: record_delete_field(rec, "710", field_position_global=field[4]) # 773 journal translations journals = get_journals() for field in record_get_field_instances(rec, "773"): subs = field_get_subfield_instances(field) new_subs = [] for idx, (key, value) in enumerate(subs): if key == "p": new_subs.append((key, translate_config(value, journals))) else: new_subs.append((key, value)) record_delete_field(rec, tag="773", field_position_global=field[4]) record_add_field(rec, "773", subfields=new_subs) # FFT (856) Dealing with graphs figure_counter = 0 for field in record_get_field_instances(rec, "856", ind1="4"): subs = field_get_subfields(field) newsubs = [] remove = False if "z" in subs: is_figure = [s for s in subs["z"] if "figure" in s.lower()] if is_figure and "u" in subs: is_subformat = [s for s in subs["u"] if "subformat" in s.lower()] if not is_subformat: url = subs["u"][0] if url.endswith(".pdf"): # We try to convert fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR) os.close(fd) _print("Downloading %s into %s" % (url, local_url), verbose=5) plotfile = "" try: plotfile = download_url(url=url, download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print("Download failed while attempting to reach %s. Skipping.." % (url,)) remove = True if plotfile: converted = convert_images([plotfile]) if converted: url = converted.pop() _print("Successfully converted %s to %s" % (local_url, url), verbose=5) else: _print("Conversion failed on %s" % (local_url,)) url = None remove = True if url: newsubs.append(("a", url)) newsubs.append(("t", "Plot")) figure_counter += 1 if "y" in subs: newsubs.append(("d", "%05d %s" % (figure_counter, subs["y"][0]))) newsubs.append(("n", subs["y"][0])) else: # Get basename without extension. name = os.path.basename(os.path.splitext(subs["u"][0])[0]) newsubs.append(("d", "%05d %s" % (figure_counter, name))) newsubs.append(("n", name)) if not newsubs and "u" in subs: is_fulltext = [s for s in subs["u"] if ".pdf" in s] if is_fulltext: newsubs = [("t", "INSPIRE-PUBLIC"), ("a", subs["u"][0])] if not newsubs and "u" in subs: remove = True is_zipfile = [s for s in subs["u"] if ".zip" in s] if is_zipfile: url = is_zipfile[0] local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url)) _print("Downloading %s into %s" % (url, local_url), verbose=5) zipped_archive = "" try: zipped_archive = download_url(url=is_zipfile[0], download_to_file=local_url, timeout=30.0) except InvenioFileDownloadError: _print("Download failed while attempting to reach %s. Skipping.." % (is_zipfile[0],)) remove = True if zipped_archive: unzipped_archive = unzip(zipped_archive) list_of_pngs = locate("*.png", unzipped_archive) for png in list_of_pngs: if "_vti_" in png or "__MACOSX" in png: continue figure_counter += 1 plotsubs = [] plotsubs.append(("a", png)) caption = "%05d %s" % (figure_counter, os.path.basename(png)) plotsubs.append(("d", caption)) plotsubs.append(("t", "Plot")) record_add_field(rec, "FFT", subfields=plotsubs) if not remove and not newsubs and "u" in subs: urls = ( "http://cdsweb.cern.ch", "http://cms.cern.ch", "http://cmsdoc.cern.ch", "http://documents.cern.ch", "http://preprints.cern.ch", "http://cds.cern.ch", ) for val in subs["u"]: if any(url in val for url in urls): remove = True break if val.endswith("ps.gz"): remove = True if newsubs: record_add_field(rec, "FFT", subfields=newsubs) remove = True if remove: record_delete_field(rec, "856", ind1="4", field_position_global=field[4]) # 500 - Preliminary results if "THESIS" not in collections: subs = [("a", "Preliminary results")] record_add_field(rec, "500", subfields=subs) for collection in collections: record_add_field(rec, "980", subfields=[("a", collection)]) return rec
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, changeApplied, isBulk=False): """Handle record update requests like adding, modifying, moving or deleting of fields or subfields. Possible common error situations: - Missing cache file - Cache file modified in other editor """ response = {} if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False: # In case of a bulk request, the changes are deliberately performed imemdiately one after another response['resultCode'] = 107 else: try: record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents(recid, uid)[1:] except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format'] return response; if changeApplied != -1: pending_changes = pending_changes[:changeApplied] + pending_changes[changeApplied+1:] field_position_local = data.get('fieldPosition') if field_position_local is not None: field_position_local = int(field_position_local) if request_type == 'overrideChangesList': pending_changes = data['newChanges'] response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed'] elif request_type == 'removeChange': #the change is removed automatically by passing the changeApplied parameter response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed'] elif request_type == 'desactivateHoldingPenChangeset': # the changeset has been marked as processed ( user applied it in the editor) # marking as used in the cache file # CAUTION: This function has been implemented here because logically it fits # with the modifications made to the cache file. No changes are made to the # Holding Pen physically. The changesets are related to the cache because # we want to cancel the removal every time the cache disappears for any reason desactivated_hp_changes[data.get('desactivatedChangeset')] = True; response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['disabled_hp_changeset'] elif request_type == 'addField': if data['controlfield']: record_add_field(record, data['tag'], controlfield_value=data['value']) response['resultCode'] = 20 else: record_add_field(record, data['tag'], data['ind1'], data['ind2'], subfields=data['subfields'], field_position_local=field_position_local) response['resultCode'] = 21 elif request_type == 'addSubfields': subfields = data['subfields'] for subfield in subfields: record_add_subfield_into(record, data['tag'], subfield[0], subfield[1], subfield_position=None, field_position_local=field_position_local) if len(subfields) == 1: response['resultCode'] = 22 else: response['resultCode'] = 23 elif request_type == 'modifyField': # changing the field structure # first remove subfields and then add new... change the indices subfields = data['subFields'] # parse the JSON representation of the subfields here new_field = create_field(subfields, data['ind1'], data['ind2']); record_replace_field(record, data['tag'], new_field, field_position_local = data['fieldPosition']) response['resultCode'] = 26 #response['debuggingValue'] = data['subFields']; elif request_type == 'modifyContent': if data['subfieldIndex'] != None: record_modify_subfield(record, data['tag'], data['subfieldCode'], data['value'], int(data['subfieldIndex']), field_position_local=field_position_local) else: record_modify_controlfield(record, data['tag'], data["value"], field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'moveSubfield': record_move_subfield(record, data['tag'], int(data['subfieldIndex']), int(data['newSubfieldIndex']), field_position_local=field_position_local) response['resultCode'] = 25 elif request_type == 'moveField': if data['direction'] == 'up': final_position_local = field_position_local-1 else: # direction is 'down' final_position_local = field_position_local+1 record_move_fields(record, data['tag'], [field_position_local], final_position_local) response['resultCode'] = 32 elif request_type == 'deleteFields': to_delete = data['toDelete'] deleted_fields = 0 deleted_subfields = 0 for tag in to_delete: # Sorting the fields in a edcreasing order by the local position ! fieldsOrder = to_delete[tag].keys() fieldsOrder.sort(lambda a,b: int(b)-int(a)) for field_position_local in fieldsOrder: if not to_delete[tag][field_position_local]: # No subfields specified - delete entire field. record_delete_field(record, tag, field_position_local=int(field_position_local)) deleted_fields += 1 else: for subfield_position in \ to_delete[tag][field_position_local][::-1]: # Delete subfields in reverse order (to keep the # indexing correct). record_delete_subfield_from(record, tag, int(subfield_position), field_position_local=int(field_position_local)) deleted_subfields += 1 if deleted_fields == 1 and deleted_subfields == 0: response['resultCode'] = 26 elif deleted_fields and deleted_subfields == 0: response['resultCode'] = 27 elif deleted_subfields == 1 and deleted_fields == 0: response['resultCode'] = 28 elif deleted_subfields and deleted_fields == 0: response['resultCode'] = 29 else: response['resultCode'] = 30 response['cacheMTime'], response['cacheDirty'] = \ update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, desactivated_hp_changes), \ True return response
def compare_records(self, record1, record2, opt_mode=None): """ Compares two records to identify added/modified/deleted tags. The records are either the upload record or existing record or record archived. Returns a Tuple of Dictionaries(For modified/added/deleted tags). """ def remove_control_tag(tag_list): """ Returns the list of keys without any control tags """ cleaned_list = [ item for item in tag_list if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS ] return cleaned_list def group_record_tags(): """ Groups all the tags in a Record as Common/Added/Deleted tags. Returns a Tuple of 3 lists for each category mentioned above. """ rec1_keys = record1.keys() rec2_keys = record2.keys() com_tag_lst = [key for key in rec1_keys if key in rec2_keys] # tags in record2 not present in record1 del_tag_lst = [key for key in rec2_keys if key not in rec1_keys] # additional tags in record1 add_tag_lst = [key for key in rec1_keys if key not in rec2_keys] return (com_tag_lst, add_tag_lst, del_tag_lst) # declaring dictionaries to hold the identified patch mod_patch = {} add_patch = {} del_patch = {} result = {} (common_tags, added_tags, deleted_tags) = group_record_tags() if common_tags: mod_patch = self.find_modified_tags(common_tags, record1, record2) if added_tags: for tag in added_tags: add_patch[tag] = record1[tag] # if record comes with correct, it should already have fields # marked with '0' code. If not deleted tag list will if deleted_tags and \ opt_mode == 'replace' or opt_mode == 'delete': for tag in deleted_tags: del_patch[tag] = record2[tag] # returning back a result dictionary with all available patches if mod_patch: result['MOD'] = mod_patch if add_patch: result['ADD'] = add_patch if del_patch: # for a tag that has been deleted in the upload record in replace # mode, loop through all the fields of the tag and add additional # subfield with code '0' and value '__DELETE_FIELDS__' # NOTE Indicators taken into consideration while deleting fields for tag in del_patch: for data_tuple in del_patch[tag]: ind1 = data_tuple[1] ind2 = data_tuple[2] record_delete_field(del_patch, tag, ind1, ind2) record_add_field(del_patch, tag, ind1, ind2, "", [ (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE) ]) result['DEL'] = del_patch return result
def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == "newRecord": # Create a new record. new_recid = reserve_record_id() new_type = data["newType"] if new_type == "empty": # Create a new empty record. create_cache_file(recid, uid) response["resultCode"], response["newRecID"] = 6, new_recid elif new_type == "template": # Create a new record from XML record template. template_filename = data["templateFilename"] template = get_record_template(template_filename) if not template: response["resultCode"] = 108 else: record = create_record(template)[0] if not record: response["resultCode"] = 109 else: record_add_field(record, "001", controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response["resultCode"], response["newRecID"] = 7, new_recid elif new_type == "clone": # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, "001") record_add_field(record, "001", controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response["resultCode"], response["newRecID"] = 8, new_recid elif request_type == "getRecord": # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data["inReadOnlyMode"] if record_status == 0: response["resultCode"] = 102 elif record_status == -1: response["resultCode"] = 103 elif not read_only_mode and not existing_cache and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif ( not read_only_mode and existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid) ): response["resultCode"] = 104 elif not read_only_mode and record_locked_by_queue(recid): response["resultCode"] = 105 else: if data.get("deleteRecordCache"): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key("recordRevision"): record_revision_ts = data["recordRevision"] record_xml = get_marcxml_of_revision(recid, record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 undo_list = [] redo_list = [] elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} undo_list = [] redo_list = [] cache_dirty = False else: # TODO: This try except should be replaced with something nicer, # like an argument indicating if a new cache file is to # be created try: cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list = get_cache_file_contents( recid, uid ) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision) and get_record_revisions(recid) != (): # This sould prevent from using old cache in case of # viewing old version. If there are no revisions, # it means we should skip this step because this # is a new record response["cacheOutdated"] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False undo_list = [] redo_list = [] if data["clonedRecord"]: response["resultCode"] = 9 else: response["resultCode"] = 3 revision_author = get_record_revision_author(recid, record_revision) last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid)) revisions_history = get_record_revision_timestamps(recid) number_of_physical_copies = get_number_copies(recid) bibcirc_details_URL = create_item_details_url(recid, ln) can_have_copies = can_record_have_physical_copies(recid) response["cacheDirty"], response["record"], response["cacheMTime"], response["recordRevision"], response[ "revisionAuthor" ], response["lastRevision"], response["revisionsHistory"], response["inReadOnlyMode"], response[ "pendingHpChanges" ], response[ "disabledHpChanges" ], response[ "undoList" ], response[ "redoList" ] = ( cache_dirty, record, mtime, revision_to_timestamp(record_revision), revision_author, last_revision_ts, revisions_history, read_only_mode, pending_changes, disabled_hp_changes, undo_list, redo_list, ) response["numberOfCopies"] = number_of_physical_copies response["bibCirculationUrl"] = bibcirc_details_URL response["canRecordHavePhysicalCopies"] = can_have_copies # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, "bibedit_tagformat") tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response["tagFormat"] = tagformat elif request_type == "submit": # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response["resultCode"] = 106 elif not get_cache_mtime(recid, uid) == data["cacheMTime"]: response["resultCode"] = 107 elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif record_locked_by_queue(recid): response["resultCode"] = 105 else: try: tmp_result = get_cache_file_contents(recid, uid) record_revision = tmp_result[1] record = tmp_result[2] pending_changes = tmp_result[3] # disabled_changes = tmp_result[4] xml_record = print_rec(record) record, status_code, list_of_errors = create_record(xml_record) if status_code == 0: response["resultCode"], response["errors"] = 110, list_of_errors elif not data["force"] and not latest_record_revision(recid, record_revision): response["cacheOutdated"] = True else: save_xml_record(recid, uid) response["resultCode"] = 4 except: response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_wrong_cache_file_format"] elif request_type == "revert": revId = data["revId"] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if cache_exists(recid, uid): delete_cache_file(recid, uid) response["resultCode"] = 4 elif request_type == "cancel": # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data["cacheMTime"]: delete_cache_file(recid, uid) response["resultCode"] = 5 else: response["resultCode"] = 107 else: response["resultCode"] = 5 elif request_type == "deleteRecord": # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if has_copies(recid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_physical_copies_exist"] elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_user"] elif record_locked_by_queue(recid): response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_queue"] else: if not existing_cache: record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = create_cache_file( recid, uid ) else: try: record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = get_cache_file_contents( recid, uid )[ 1: ] except: record_revision, record, pending_changes, deactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, "980", " ", " ", "", [("c", "DELETED")]) undo_list = [] redo_list = [] update_cache_file_contents( recid, uid, record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list ) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes # related to a deleted record response["resultCode"] = 10 elif request_type == "deleteRecordCache": # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == data["cacheMTime"]: delete_cache_file(recid, uid) response["resultCode"] = 11 elif request_type == "prepareRecordMerge": # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response["resultCode"] = 106 elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid): response["resultCode"] = 104 elif record_locked_by_queue(recid): response["resultCode"] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response["resultCode"] = 12 return response
def process_record(self, record): """@see: BaseFieldCommand.process_record""" bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2)
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[ 1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: #else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, [])) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def Update_Approval_DB(parameters, curdir, form, user_info=None): """ This function updates the approval database when a document has just been approved or rejected. It uses the [categformatDAM] parameter to compute the category of the document. Must be called after the Get_Report_Number function. Parameters: * categformatDAM: It contains the regular expression which allows the retrieval of the category from the reference number. Eg: if [categformatDAM]="TEST-<CATEG>-.*" and the reference is "TEST-CATEG1-2001-001" then the category will be recognized as "CATEG1". """ global rn, sysno doctype = form['doctype'] act = form['act'] categformat = parameters['categformatDAM'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" pid = os.getpid() now = time.time() access = "%i_%s" % (now,pid) if act not in ["APP", "APS", "APM", "APO"]: # retrieve category if re.search("<FILE:",categformat): filename = categformat.replace("<FILE:","") filename = filename.replace(">","") if os.path.exists("%s/%s" % (curdir,filename)): fp = open("%s/%s" % (curdir,filename)) category = fp.read() fp.close() else: category="" category = category.replace("\n","") else: categformat = categformat.replace("<CATEG>","([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "" else: category = "" if category == "": category = "unknown" sth = run_sql("SELECT status,dFirstReq,dLastReq,dAction FROM sbmAPPROVAL WHERE doctype=%s and categ=%s and rn=%s", (doctype,category,rn,)) if len(sth) == 0: run_sql("INSERT INTO sbmAPPROVAL (doctype, categ, rn, status, dFirstReq, dLastReq, dAction, access) VALUES (%s,%s,%s,'waiting',NOW(),NOW(),'',%s)", (doctype,category,rn,access,)) else: run_sql("UPDATE sbmAPPROVAL SET dLastReq=NOW(), status='waiting' WHERE doctype=%s and categ=%s and rn=%s", (doctype,category,rn,)) else: ## Since this is the "APP" action, this call of the function must be ## on behalf of the referee - in order to approve or reject an item. ## We need to get the decision from the decision file: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read().strip() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" from invenio.bibrecord import record_delete_field, record_add_field, record_xml_output from invenio.bibedit_utils import get_bibrecord from invenio.bibtask import task_low_level_submission record = get_bibrecord(sysno) ## Either approve or reject the item, based upon the contents ## of 'decision': if decision == "approve": run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='approved' WHERE rn=%s", (rn,)) else: run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='rejected' WHERE rn=%s", (rn,)) if act == "APS": record_delete_field(record, "980") record_add_field(record, '980', ' ', ' ', '', [('a', 'REJBLOG')]) fd, name = tempfile.mkstemp(suffix='.xml', dir=CFG_TMPDIR) os.write(fd, """<collection>\n""") os.write(fd, record_xml_output(record)) os.write(fd, """</collection\n>""") os.close(fd) task_low_level_submission('bibupload', 'admin', '-c', name) task_low_level_submission('bibindex', 'admin') task_low_level_submission('webcoll', 'admin', '-c', "Provisional Blogs") task_low_level_submission('webcoll', 'admin', '-c', "Blogs") return ""
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2)==0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append( deepcopy(fields1[m[0]]) ) else: #else take the field from rec2 new_fields.append( deepcopy(fields2[m[1]]) ) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append( deepcopy(fields2[m[1]]) ) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append( deepcopy(fields1[m[0]]) ) else: new_fields.append( deepcopy(fields1[m[0]]) ) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, []) ) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
# Did not find existing record in database holdingpen_records.append(record) continue # We remove 500 field temporary/brief entry from revision if record already exists fields_500 = record_get_field_instances(record, '500', ind1="%", ind2="%") if fields_500 is not None: field_positions = [] for field in fields_500: subfields = field_get_subfield_instances(field) for subfield in subfields: if re.match("^.?((temporary|brief) entry).?$", subfield[1].lower(), re.IGNORECASE): field_positions.append((field[1], field[2], field[4])) for ind1, ind2, pos in field_positions: record_delete_field(record, '500', ind1=ind1, ind2=ind2, field_position_global=pos) # Now compare new version with existing one, returning a diff[tag] = (diffcode, [..]) # None - if field is the same for both records # ('r',) - field missing from input record, ignored ATM # ('a',) - new field added, should be updated with append # ('c', difference_comparison) -> if field field_id exists in both records, but it's value has changed # -> uploaded with correct if accepted fields_to_add = [] fields_to_correct = [] holdingpen = False difference = record_diff(existing_record, record, compare_subfields=match_subfields) for tag, diff in difference.iteritems(): if diff is None: # No difference in tag
def perform_request_record(req, request_type, recid, uid, data): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == 'newRecord': # Create a new record. new_recid = reserve_record_id() new_type = data['newType'] if new_type == 'empty': # Create a new empty record. create_cache_file(recid, uid) response['resultCode'], response['newRecID'] = 6, new_recid elif new_type == 'template': # Create a new record from XML record template. template_filename = data['templateFilename'] template = get_record_template(template_filename) if not template: response['resultCode'] = 108 else: record = create_record(template)[0] if not record: response['resultCode'] = 109 else: record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 7, new_recid elif new_type == 'clone': # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, '001') record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 8, new_recid elif request_type == 'getRecord': # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data['inReadOnlyMode'] if record_status == 0: response['resultCode'] = 102 elif record_status == -1: response['resultCode'] = 103 elif not read_only_mode and not existing_cache and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and existing_cache and \ cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and record_locked_by_queue(recid): response['resultCode'] = 105 else: if data.get('deleteRecordCache'): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key('recordRevision'): record_revision_ts = data['recordRevision'] record_xml = get_marcxml_of_revision(recid, record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False else: try: cache_dirty, record_revision, record, pending_changes, disabled_hp_changes= \ get_cache_file_contents(recid, uid) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False if data['clonedRecord']: response['resultCode'] = 9 else: response['resultCode'] = 3 revision_author = get_record_revision_author(recid, record_revision) last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid)) revisions_history = get_record_revision_timestamps(recid) response['cacheDirty'], response['record'], response['cacheMTime'],\ response['recordRevision'], response['revisionAuthor'], \ response['lastRevision'], response['revisionsHistory'], \ response['inReadOnlyMode'], response['pendingHpChanges'], \ response['disabledHpChanges'] = cache_dirty, record, mtime, \ revision_to_timestamp(record_revision), revision_author, \ last_revision_ts, revisions_history, read_only_mode, pending_changes, \ disabled_hp_changes # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response['tagFormat'] = tagformat elif request_type == 'submit': # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == data['cacheMTime']: response['resultCode'] = 107 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: try: record_revision, record, pending_changes, disabled_changes = get_cache_file_contents(recid, uid)[1:] xml_record = print_rec(record) record, status_code, list_of_errors = create_record(xml_record) if status_code == 0: response['resultCode'], response['errors'] = 110, \ list_of_errors elif not data['force'] and \ not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True else: save_xml_record(recid, uid) response['resultCode'] = 4 except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format'] elif request_type == 'revert': revId = data['revId'] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if (cache_exists(recid, uid)): delete_cache_file(recid, uid) response['resultCode'] = 4 elif request_type == 'cancel': # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 5 else: response['resultCode'] = 107 else: response['resultCode'] = 5 elif request_type == 'deleteRecord': # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: if not existing_cache: record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid) else: try: record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents( recid, uid)[1:] except: record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')]) update_cache_file_contents(recid, uid, record_revision, record, pending_changes, desactivated_hp_changes) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes related to a deleted record response['resultCode'] = 10 elif request_type == 'deleteRecordCache': # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \ data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 11 elif request_type == 'prepareRecordMerge': # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response['resultCode'] = 106 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response['resultCode'] = 12 return response
def index(self, req, form): """ Display live bibz39 queue """ referer = '/admin2/bibz39/' navtrail = ' <a class="navtrail" href=\"%s/help/admin\">Admin Area</a> ' % CFG_SITE_URL auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: return page_not_authorized(req=req, referer=referer, text=auth_message, navtrail=navtrail) argd = wash_urlargd(form, { 'search': (str, ''), 'marcxml': (str, ''), 'server': (list, []), 'search_type': (str, ''), }) if argd['marcxml']: uid = getUid(req) new_recid = reserve_record_id() record = create_record(argd["marcxml"])[0] record_delete_field(record, '001') record_delete_field(record, '005') record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache(new_recid, uid, record, True) redirect_to_url(req, '{0}/record/edit/#state=edit&recid={1}'.format(CFG_SITE_SECURE_URL, new_recid)) body_content = '' body_content += self.generate_request_form(argd) if "search" in argd and argd["search"] and 'search_type' in argd and argd["search_type"] in \ self._request_type_dict: conn = None list_of_record = [] errors = {} res = [] err = False for server in argd["server"]: try: errors[server] = {"internal": [], "remote": []} conn = zoom.Connection(CFG_Z39_SERVER[server]["address"], CFG_Z39_SERVER[server]["port"], user=CFG_Z39_SERVER[server].get("user", None), password=CFG_Z39_SERVER[server].get("password", None)) conn.databaseName = CFG_Z39_SERVER[server]["databasename"] conn.preferredRecordSyntax = CFG_Z39_SERVER[server]["preferredRecordSyntax"] value = argd["search"].replace("-", "") if argd["search_type"] == "ISBN" else \ argd["search"] query = zoom.Query('CCL', u'{0}="{1}"'.format( self._request_type_dict[argd["search_type"]], value)) body_content += "" try: server_answer = conn.search(query) if len(server_answer) < 100: nb_to_browse = len(server_answer) else: nb_to_browse = 100 errors[server]["remote"].append( "The server {0} returned too many results. {1}/{2} are printed.".format( server, nb_to_browse, len(server_answer))) for result in server_answer[0:nb_to_browse]: res.append({"value": result, "provider": server}) except zoom.Bib1Err as e: errors[server]["remote"].append("{0}".format(e)) err = True conn.close() except Exception as e: register_exception() errors[server]["internal"].append("{0}".format(e)) if conn: conn.close() p_err = False warnings = '<div class="error">' for server in errors: if errors[server]["internal"] or errors[server]["remote"]: warnings += "<b>{0}</b><ul>".format(server) for error in errors[server]["internal"]: warnings += "<li>(internal) {0}</li>".format(error) for error in errors[server]["remote"]: warnings += "<li>(remote) {0}</li>".format(error) warnings += "</ul>" p_err = True if p_err: body_content += "{0}</div>".format(warnings) if res: body_content += "<table id='result_area' class='fullwidth tablesorter'>" body_content += "<tr><th class='bibz39_titles_th' >Title</th><th class='bibz39_sources_th'>Authors</th><th>Publisher</th><th class='bibz39_sources_th'>Source</th><th><div class='bibz39_button_td'>View XML</div></th><th><div class='bibz39_button_td'>Import</div></th></tr>" for identifier, rec in enumerate(res): list_of_record.append( create_record( self.interpret_string(zmarc.MARC( rec["value"].data, strict=0).toMARCXML()))[0]) title = '' authors = '' publishers = '' if "100" in list_of_record[identifier]: for author in list_of_record[identifier]["100"]: for tag in author[0]: if tag[0] == 'a': if authors != "": authors += " / " + tag[1].strip(",;.") else: authors += tag[1].strip(",;.") + " " if "700" in list_of_record[identifier]: for author in list_of_record[identifier]["700"]: for tag in author[0]: if tag[0] == 'a': if authors != "": authors += " / " + tag[1].strip(",;.") else: authors += tag[1].strip(",;.") + " " if "260" in list_of_record[identifier]: for publisher in list_of_record[identifier]["260"][0][0]: publishers += publisher[1] + " " if "245" in list_of_record[identifier]: for title_constituant in list_of_record[identifier]["245"][0][0]: title += title_constituant[1] + " " body_content += "<tr><td><div class='bibz39_titles' onclick='showxml({6})'>{0}<div><td>{4}</td><td>{5}</td</td><td><div>{2}</div></td><td><div class='bibz39_button_td'>{3}</div></td><td><div class='bibz39_button_td'>{1}</div></td></tr>".format( title, '<form method="post" action="/admin2/bibz39/"><input type="hidden" name="marcxml" value="{0}"><input type="submit" value="Import" /></form>'.format( cgi.escape(record_xml_output(list_of_record[identifier])).replace( "\"", """).replace("\'", """)), rec["provider"], '<button onclick="showxml({0})">View</button>'.format(identifier), authors, publishers, identifier) body_content += "</table>" body_content += '<script type="text/javascript">' body_content += "var gAllMarcXml= {" for i, rec in enumerate(list_of_record): body_content += "{0}:{1},".format(i, json.dumps(record_xml_output(rec))) body_content += "};" body_content += '</script>' else: if not err: body_content += "<p class='bibz39_button_td spinning_wheel'> No result</p>" body_content += '<div id="dialog-message" title="XML Preview"></div></div>' return page(title="Z39.50 Search", body=body_content, errors=[], warnings=[], metaheaderadd=get_head(), req=req)