Exemple #1
0
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value,
                     field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(
                                    record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(
                                record, tag, ind1, ind2)
        return record
Exemple #2
0
def check_records(records, empty=False):
    fields = ['100', '700']

    for record in records:
        for field in fields:
            for pos, val in record.iterfield(field+"__w"):
                record.warn("%s %s" % (pos, val))
                record_delete_subfield(record, field, "w")
                record.set_amended('Removing old countries')
            if field in record:
                for i, x in enumerate(record[field]):
                    new_countries = find_nations(x[0], ['u', 'v'], record)
                    current_countries = get_current_countries(x[0])

                    if new_countries is not current_countries:
                        for val in set(new_countries):
                            record.add_subfield((field + '__w', i, 0), 'w', val)
Exemple #3
0
def check_records(records, empty=False):
    fields = ['100', '700']

    for record in records:
        for field in fields:
            for pos, val in record.iterfield(field+"__w"):
                record.warn("%s %s" % (pos, val))
                record_delete_subfield(record, field, "w")
                record.set_amended('Removing old countries')
            if field in record:
                for i, x in enumerate(record[field]):
                    new_countries = find_nations(x[0], ['u', 'v'], record)
                    current_countries = get_current_countries(x[0])

                    if new_countries is not current_countries:
                        for val in set(new_countries):
                            record.add_subfield((field + '__w', i, 0), 'w', val)
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value, field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(record, tag, ind1, ind2)
        return record
Exemple #5
0
def marcxml_filter_out_tags(recid, fields):
    """
    Returns the fields of record 'recid' that share the same tag and
    indicators as those specified in 'fields', but for which the
    subfield is different. This is nice to emulate a bibupload -c that
    corrects only specific subfields.

    Parameters:
           recid - *int* the id of the record to process

          fields - *list(str)* the list of fields that we want to filter
                   out. Eg ['909COp', '909COo']
    """
    out = ''

    record = get_record(recid)

    # Delete subfields that we want to replace
    for field in fields:
        record_delete_subfield(record,
                               tag=field[0:3],
                               ind1=field[3:4],
                               ind2=field[4:5],
                               subfield_code=field[5:6])

    # Select only datafields that share tag + indicators
    processed_tags_and_ind = []
    for field in fields:
        if not field[0:5] in processed_tags_and_ind:
            # Ensure that we do not process twice the same datafields
            processed_tags_and_ind.append(field[0:5])
            for datafield in record.get(field[0:3], []):
                if datafield[1] == field[3:4].replace('_', ' ') and \
                       datafield[2] == field[4:5].replace('_', ' ') and \
                       datafield[0]:
                    out += field_xml_output(datafield, field[0:3]) + '\n'

    return out
def marcxml_filter_out_tags(recid, fields):
    """
    Returns the fields of record 'recid' that share the same tag and
    indicators as those specified in 'fields', but for which the
    subfield is different. This is nice to emulate a bibupload -c that
    corrects only specific subfields.

    Parameters:
           recid - *int* the id of the record to process

          fields - *list(str)* the list of fields that we want to filter
                   out. Eg ['909COp', '909COo']
    """
    out = ''

    record = get_record(recid)

    # Delete subfields that we want to replace
    for field in fields:
        record_delete_subfield(record,
                               tag=field[0:3],
                               ind1=field[3:4],
                               ind2=field[4:5],
                               subfield_code=field[5:6])

    # Select only datafields that share tag + indicators
    processed_tags_and_ind = []
    for field in fields:
        if not field[0:5] in processed_tags_and_ind:
            # Ensure that we do not process twice the same datafields
            processed_tags_and_ind.append(field[0:5])
            for datafield in record.get(field[0:3], []):
                if datafield[1] == field[3:4].replace('_', ' ') and \
                       datafield[2] == field[4:5].replace('_', ' ') and \
                       datafield[0]:
                    out += field_xml_output(datafield, field[0:3]) + '\n'

    return out