def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield( record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field( record, tag, ind1, ind2) return record
def check_records(records, empty=False): fields = ['100', '700'] for record in records: for field in fields: for pos, val in record.iterfield(field+"__w"): record.warn("%s %s" % (pos, val)) record_delete_subfield(record, field, "w") record.set_amended('Removing old countries') if field in record: for i, x in enumerate(record[field]): new_countries = find_nations(x[0], ['u', 'v'], record) current_countries = get_current_countries(x[0]) if new_countries is not current_countries: for val in set(new_countries): record.add_subfield((field + '__w', i, 0), 'w', val)
def _filter_fields(self, record, output_fields): """Removes from the record all the fields that are not output_fields. @param record: record structure (@see: bibrecord.py for details) @param output_fields: list of fields that should remain in the record @return: record containing only fields among output_fields """ # Tibor's new implementation: for tag in record.keys(): if tag not in output_fields: bibrecord.record_delete_fields(record, tag) return record # Rado's old implementation that leads to bibrecord-related # bug, see <https://savannah.cern.ch/task/?10267>: record_keys = record.keys() # Check if any of the tags, fields or subfields match # any value in output_fields. In case of match we leave # the element and its children in the record. # # If the element and all its children are not among the # output fields, it is deleted for tag in record_keys: tag = tag.lower() if tag not in output_fields: for (subfields, ind1, ind2, value, field_number) in record[tag]: current_field = tag + ind1.strip() + ind2.strip() current_field = current_field.lower() if current_field not in output_fields: delete_parents = True for (code, value) in subfields: current_subfield = current_field + code current_subfield = current_subfield.lower() if current_subfield not in output_fields: bibrecord.record_delete_subfield(record, tag, code, ind1, ind2) else: delete_parents = False if delete_parents: bibrecord.record_delete_field(record, tag, ind1, ind2) return record
def marcxml_filter_out_tags(recid, fields): """ Returns the fields of record 'recid' that share the same tag and indicators as those specified in 'fields', but for which the subfield is different. This is nice to emulate a bibupload -c that corrects only specific subfields. Parameters: recid - *int* the id of the record to process fields - *list(str)* the list of fields that we want to filter out. Eg ['909COp', '909COo'] """ out = '' record = get_record(recid) # Delete subfields that we want to replace for field in fields: record_delete_subfield(record, tag=field[0:3], ind1=field[3:4], ind2=field[4:5], subfield_code=field[5:6]) # Select only datafields that share tag + indicators processed_tags_and_ind = [] for field in fields: if not field[0:5] in processed_tags_and_ind: # Ensure that we do not process twice the same datafields processed_tags_and_ind.append(field[0:5]) for datafield in record.get(field[0:3], []): if datafield[1] == field[3:4].replace('_', ' ') and \ datafield[2] == field[4:5].replace('_', ' ') and \ datafield[0]: out += field_xml_output(datafield, field[0:3]) + '\n' return out