Example #1
0
def check_record(record, source_field, new_field, subfield_filter):
    """ Changes the code of a field to new_field """
    from collections import namedtuple
    from invenio.bibrecord import (record_add_field, record_delete_field,
                                   record_get_field_instances)

    assert len(source_field) == 5
    assert len(new_field) == 5
    source_field = source_field.replace("_", " ")
    new_field = new_field.replace("_", " ")

    assert len(subfield_filter) == 2
    SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value'])
    subfield_filter = SubfieldFilter(*subfield_filter)

    def filter_passes(subfield_code, result):
        return subfield_filter.code is None or (
            subfield_filter.code in ('%', subfield_code) and
            subfield_filter.value == result)

    subfields_list = []
    for subfields, ind1, ind2, _, pos in record_get_field_instances(
            record, source_field[:3], source_field[3], source_field[4]):
        if any(filter_passes(*s) for s in subfields):
            subfields_list.append(subfields)
            record_delete_field(record, source_field[:3], ind1, ind2, pos)

    for subfields in subfields_list:
        record_add_field(record, new_field[:3], new_field[3], new_field[4],
                         subfields=subfields)
        record.set_amended('move from %s to %s: %s' %
                           (source_field.replace(" ", "_"),
                            new_field.replace(" ", "_"), subfields))
Example #2
0
def delete_field(rec, fnum, findex):
    """Delete a specific field.
    @param rec: a record dictionary structure
    @param fnum: a 3 characters long string indicating field tag number
    @param findex: the rec field position in the group of fields it belongs
    """
    record_delete_field(rec, fnum, field_position_local=findex)
def check_record(record):
    """ Split fields """
    from invenio.bibrecord import record_delete_field
    from invenio.bibrecord import record_add_field

    message = ""
    marc = '693__e'
    tag = marc[:3]
    if not record.has_key(tag):
        continue
    ind1 = marc[3].replace('_', ' ')
    ind2 = marc[4].replace('_', ' ')
    sfcode = marc[5]
    to_split = fields_to_split(record, tag, ind1, ind2, sfcode)

    if not to_split:
        continue
#   work from the back to try to preserve order
    positions = to_split.keys()
    positions.sort(reverse=True)
    for global_pos in positions:
        (parts, rest_before, rest_after) = to_split[global_pos]
        message += " - split %s %s" % (tag, parts)
        record_delete_field(record, tag, ind1, ind2,
                            field_position_global=global_pos)
        parts.reverse()
        for subfield in parts:
            field = rest_before + [subfield, ] + rest_after
            record_add_field(record, tag, ind1, ind2, '', field,
                             field_position_global=global_pos)
    if message:
        record.set_amended(message)
 def _delete_field_condition(self, record):
     """Checks if a subfield meets the condition for the
     field to be deleted
     """
     try:
         for field in record[self._tag]:
             for subfield in field[0]:
                 if subfield[0] == self._conditionSubfield:
                     if self._condition_exact_match:
                         if self._condition == subfield[1]:
                             bibrecord.record_delete_field(
                                 record,
                                 self._tag,
                                 self._ind1,
                                 self._ind2,
                                 field_position_global=field[4])
                             self._modifications += 1
                             break
                     else:
                         if self._condition in subfield[1]:
                             bibrecord.record_delete_field(
                                 record,
                                 self._tag,
                                 self._ind1,
                                 self._ind2,
                                 field_position_global=field[4])
                             self._modifications += 1
                             break
     except KeyError:
         pass
Example #5
0
def delete_field(rec, fnum, findex):
    """Delete a specific field.
    @param rec: a record dictionary structure
    @param fnum: a 3 characters long string indicating field tag number
    @param findex: the rec field position in the group of fields it belongs
    """
    record_delete_field(rec, fnum, field_position_local=findex)
Example #6
0
def check_record(record, source_field, new_field, subfield_filter):
    """ Changes the code of a field to new_field """
    from collections import namedtuple
    from invenio.bibrecord import (record_add_field, record_delete_field,
                                   record_get_field_instances)

    assert len(source_field) == 5
    assert len(new_field) == 5
    source_field = source_field.replace("_", " ")
    new_field = new_field.replace("_", " ")

    assert len(subfield_filter) == 2
    SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value'])
    subfield_filter = SubfieldFilter(*subfield_filter)

    def filter_passes(subfield_code, result):
        return subfield_filter.code is None or (
            subfield_filter.code in ('%', subfield_code)
            and subfield_filter.value == result)

    subfields_list = []
    for subfields, ind1, ind2, _, pos in record_get_field_instances(
            record, source_field[:3], source_field[3], source_field[4]):
        if any(filter_passes(*s) for s in subfields):
            subfields_list.append(subfields)
            record_delete_field(record, source_field[:3], ind1, ind2, pos)

    for subfields in subfields_list:
        record_add_field(record,
                         new_field[:3],
                         new_field[3],
                         new_field[4],
                         subfields=subfields)
        record.set_amended('move from %s to %s: %s' % (source_field.replace(
            " ", "_"), new_field.replace(" ", "_"), subfields))
 def process_record(self, record):
     """@see: BaseFieldCommand.process_record"""
     if self._condition:
         self._delete_field_condition(record)
     else:
         bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2)
         self._modifications += 1
def check_existing_pdg_fields(recids, pdg_data, current_records):
    _print_out("Comparing new and old PDG data for " + str(len(recids)) +
               " records...")
    records = {}
    for recid in recids:
        record_mod = {}
        record_mod['001'] = deepcopy(current_records[recid]['001'])
        record_mod['084'] = deepcopy(current_records[recid]['084'])
        fields = record_get_field_instances(record_mod, '084')
        current_pdg_data = []
        for field in fields:
            if is_pdg_field(field):
                current_pdg_data.append(
                    field_get_subfield_values(field, 'a')[0])

        current_set = set(current_pdg_data)
        new_set = set(pdg_data[recid])
        deletions = list(current_set - new_set)
        additions = list(new_set - current_set)

        if len(deletions) > 0 or len(additions) > 0:
            if len(deletions) > 0:
                for field in fields:
                    if is_pdg_field(field):
                        if field_get_subfield_values(field,
                                                     'a')[0] in deletions:
                            record_delete_field(record_mod,
                                                '084',
                                                ind1=' ',
                                                ind2=' ',
                                                field_position_global=field[4])

            for pdg_field in additions:
                position = record_add_field(record_mod, '084', ' ', ' ')
                record_add_subfield_into(record_mod,
                                         '084',
                                         '2',
                                         'PDG',
                                         field_position_global=position)
                record_add_subfield_into(record_mod,
                                         '084',
                                         '9',
                                         'PDG',
                                         field_position_global=position)
                record_add_subfield_into(record_mod,
                                         '084',
                                         'a',
                                         pdg_field,
                                         field_position_global=position)

            records[recid] = record_mod
            _print_verbose("Record #" + str(recid) + ": " +
                           str(len(deletions)) + " deletions and " +
                           str(len(additions)) + " additons.")
        else:
            _print_verbose("Nothing to change for record #" + str(recid))

    _print_out(str(len(records)) + " records to be corrected.")
    return records
Example #9
0
def create_xml(recid, fname=None, oaff=None):
    affs = [a for a in oaff]
    record = get_record(recid)
    auth_location = record_get_field_instances(record, '100', '', '')[0][4]
    record_delete_field(record, '700', '', '')
    for x in affs:
        record_add_subfield_into(record, '100', 'u', x, field_position_global=auth_location)
    return print_rec(record)
Example #10
0
def compare_references(test, a, b):
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertXmlEqual(a, b)
def compare_references(test, a, b):
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertXmlEqual(a, b)
 def process_record(self, record):
     """@see: BaseFieldCommand.process_record"""
     if self._condition:
         self._delete_field_condition(record)
     else:
         bibrecord.record_delete_field(record, self._tag, self._ind1,
                                       self._ind2)
         self._modifications += 1
Example #13
0
def create_xml(recid, fname=None, oaff=None):
    affs = [a for a in oaff]
    record = get_record(recid)
    auth_location = record_get_field_instances(record, '100', '', '')[0][4]
    record_delete_field(record, '700', '', '')
    for x in affs:
        record_add_subfield_into(record,
                                 '100',
                                 'u',
                                 x,
                                 field_position_global=auth_location)
    return print_rec(record)
def record_drop_fields_matching_pattern(record, pattern, fields, tag):
    """Remove fields matching given pattern from record."""
    field_positions = []
    for field in fields:
        subfields = field_get_subfield_instances(field)
        for subfield in subfields:
            if re.match(pattern, subfield[1].lower(), re.IGNORECASE):
                field_positions.append((field[1], field[2], field[4]))
                break

    for ind1, ind2, pos in field_positions:
        record_delete_field(record, tag, ind1=ind1, ind2=ind2, field_position_global=pos)
Example #15
0
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value,
                     field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(
                                    record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(
                                record, tag, ind1, ind2)
        return record
Example #16
0
def record_drop_fields_matching_pattern(record, pattern, fields, tag):
    """Remove fields matching given pattern from record."""
    field_positions = []
    for field in fields:
        subfields = field_get_subfield_instances(field)
        for subfield in subfields:
            if re.match(pattern, subfield[1].lower(), re.IGNORECASE):
                field_positions.append((field[1], field[2], field[4]))
                break

    for ind1, ind2, pos in field_positions:
        record_delete_field(record,
                            tag,
                            ind1=ind1,
                            ind2=ind2,
                            field_position_global=pos)
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value, field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(record, tag, ind1, ind2)
        return record
def check_record(record):
    """ Split fields """
    from invenio.bibrecord import record_delete_field
    from invenio.bibrecord import record_add_field

    message = ""
    marc = '693__e'
    tag = marc[:3]
    if not record.has_key(tag):
        continue
    ind1 = marc[3].replace('_', ' ')
    ind2 = marc[4].replace('_', ' ')
    sfcode = marc[5]
    to_split = fields_to_split(record, tag, ind1, ind2, sfcode)

    if not to_split:
        continue


#   work from the back to try to preserve order
    positions = to_split.keys()
    positions.sort(reverse=True)
    for global_pos in positions:
        (parts, rest_before, rest_after) = to_split[global_pos]
        message += " - split %s %s" % (tag, parts)
        record_delete_field(record,
                            tag,
                            ind1,
                            ind2,
                            field_position_global=global_pos)
        parts.reverse()
        for subfield in parts:
            field = rest_before + [
                subfield,
            ] + rest_after
            record_add_field(record,
                             tag,
                             ind1,
                             ind2,
                             '',
                             field,
                             field_position_global=global_pos)
    if message:
        record.set_amended(message)
 def _delete_field_condition(self, record):
     """Checks if a subfield meets the condition for the
     field to be deleted
     """
     try:
         for field in record[self._tag]:
             for subfield in field[0]:
                 if subfield[0] == self._conditionSubfield:
                     if self._condition_exact_match:
                         if self._condition == subfield[1]:
                             bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4])
                             self._modifications += 1
                             break
                     else:
                         if self._condition in subfield[1]:
                             bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4])
                             self._modifications += 1
                             break
     except KeyError:
         pass
Example #20
0
def check_existing_pdg_fields(recids, pdg_data, current_records):
    _print_out("Comparing new and old PDG data for " + str(len(recids)) + " records...")
    records = {}
    for recid in recids:
        record_mod = {}
        record_mod['001'] = deepcopy(current_records[recid]['001'])
        record_mod['084'] = deepcopy(current_records[recid]['084'])
        fields = record_get_field_instances(record_mod, '084')
        current_pdg_data = []
        for field in fields:
            if is_pdg_field(field):
                current_pdg_data.append(field_get_subfield_values(field, 'a')[0])

        current_set = set(current_pdg_data)
        new_set = set(pdg_data[recid])
        deletions = list(current_set - new_set)
        additions = list(new_set - current_set)

        if len(deletions) > 0 or len(additions) > 0:
            if len(deletions) > 0:
                for field in fields:
                    if is_pdg_field(field):
                        if field_get_subfield_values(field, 'a')[0] in deletions:
                            record_delete_field(record_mod, '084', ind1=' ', ind2=' ',
                                                field_position_global=field[4])

            for pdg_field in additions:
                position = record_add_field(record_mod, '084', ' ', ' ')
                record_add_subfield_into(record_mod, '084', '2', 'PDG', field_position_global=position)
                record_add_subfield_into(record_mod, '084', '9', 'PDG', field_position_global=position)
                record_add_subfield_into(record_mod, '084', 'a', pdg_field, field_position_global=position)

            records[recid] = record_mod
            _print_verbose("Record #" + str(recid) + ": " + str(len(deletions)) +
                           " deletions and " + str(len(additions)) + " additons.")
        else:
            _print_verbose("Nothing to change for record #" + str(recid))

    _print_out(str(len(records)) + " records to be corrected.")
    return records
Example #21
0
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \
                                  hpChanges, undoRedoOp, isBulk=False):
    """Handle record update requests like adding, modifying, moving or deleting
    of fields or subfields. Possible common error situations:
    - Missing cache file
    - Cache file modified in other editor
    Explanation of some parameters:
       undoRedoOp - Indicates in "undo"/"redo"/undo_descriptor operation is
                    performed by a current request.
    """

    response = {}

    if not cache_exists(recid, uid):
        response['resultCode'] = 106
    elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False:
        # In case of a bulk request, the changes are deliberately performed
        # imemdiately one after another
        response['resultCode'] = 107
    else:
        try:
            record_revision, record, pending_changes, deactivated_hp_changes, \
                undo_list, redo_list = get_cache_file_contents(recid, uid)[1:]
        except:
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'wrong_cache_file_format']
            return response

        # process all the Holding Pen changes operations ... regardles the
        # request type
#        import rpdb2;
#        rpdb2.start_embedded_debugger('password', fAllowRemote=True)
        if hpChanges.has_key("toDisable"):
            for changeId in hpChanges["toDisable"]:
                pending_changes[changeId]["applied_change"] = True

        if hpChanges.has_key("toEnable"):
            for changeId in hpChanges["toEnable"]:
                pending_changes[changeId]["applied_change"] = False

        if hpChanges.has_key("toOverride"):
            pending_changes = hpChanges["toOverride"]

        if hpChanges.has_key("changesetsToDeactivate"):
            for changesetId in hpChanges["changesetsToDeactivate"]:
                deactivated_hp_changes[changesetId] = True

        if hpChanges.has_key("changesetsToActivate"):
            for changesetId in hpChanges["changesetsToActivate"]:
                deactivated_hp_changes[changesetId] = False

        # processing the undo/redo entries
        if undoRedoOp == "undo":
            try:
                redo_list = [undo_list[-1]] + redo_list
                undo_list = undo_list[:-1]
            except:
                raise Exception("An exception occured when undoing previous" + \
                                " operation. Undo list: " + str(undo_list) + \
                                " Redo list " + str(redo_list))
        elif undoRedoOp == "redo":
            try:
                undo_list = undo_list + [redo_list[0]]
                redo_list = redo_list[1:]
            except:
                raise Exception("An exception occured when redoing previous" + \
                                " operation. Undo list: " + str(undo_list) + \
                                " Redo list " + str(redo_list))
        else:
            # This is a genuine operation - we have to add a new descriptor
            # to the undo list and cancel the redo unless the operation is
            # a bulk operation
            if undoRedoOp != None:
                undo_list = undo_list + [undoRedoOp]
                redo_list = []
            else:
                assert isBulk == True

        field_position_local = data.get('fieldPosition')
        if field_position_local is not None:
            field_position_local = int(field_position_local)
        if request_type == 'otherUpdateRequest':
            # An empty request. Might be useful if we want to perform
            # operations that require only the actions performed globally,
            # like modifying the holdingPen changes list
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'editor_modifications_changed']
        elif request_type == 'deactivateHoldingPenChangeset':
            # the changeset has been marked as processed ( user applied it in
            # the editor). Marking as used in the cache file.
            # CAUTION: This function has been implemented here because logically
            #          it fits with the modifications made to the cache file.
            #          No changes are made to the Holding Pen physically. The
            #          changesets are related to the cache because we want to
            #          cancel the removal every time the cache disappears for
            #          any reason
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
                'disabled_hp_changeset']
        elif request_type == 'addField':
            if data['controlfield']:
                record_add_field(record, data['tag'],
                                 controlfield_value=data['value'])
                response['resultCode'] = 20
            else:
                record_add_field(record, data['tag'], data['ind1'],
                                 data['ind2'], subfields=data['subfields'],
                                 field_position_local=field_position_local)
                response['resultCode'] = 21

        elif request_type == 'addSubfields':
            subfields = data['subfields']
            for subfield in subfields:
                record_add_subfield_into(record, data['tag'], subfield[0],
                    subfield[1], subfield_position=None,
                    field_position_local=field_position_local)
            if len(subfields) == 1:
                response['resultCode'] = 22
            else:
                response['resultCode'] = 23
        elif request_type == 'addFieldsSubfieldsOnPositions':
            #1) Sorting the fields by their identifiers
            fieldsToAdd = data['fieldsToAdd']
            subfieldsToAdd = data['subfieldsToAdd']
            for tag in fieldsToAdd.keys():
                positions = fieldsToAdd[tag].keys()
                positions.sort()
                for position in positions:
                    # now adding fields at a position

                    isControlfield = (len(fieldsToAdd[tag][position][0]) == 0)
                    # if there are n subfields, this is a control field
                    if isControlfield:
                        controlfieldValue = fieldsToAdd[tag][position][3]
                        record_add_field(record, tag, field_position_local = \
                                             int(position), \
                                             controlfield_value = \
                                                 controlfieldValue)
                    else:
                        subfields = fieldsToAdd[tag][position][0]
                        ind1 = fieldsToAdd[tag][position][1]
                        ind2 = fieldsToAdd[tag][position][2]
                        record_add_field(record, tag, ind1, ind2, subfields = \
                                             subfields, field_position_local = \
                                                int(position))
            # now adding the subfields
            for tag in subfieldsToAdd.keys():
                for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields
                                                          #order not important !
                    subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \
                                           keys()
                    subfieldsPositions.sort()
                    for subfieldPosition in subfieldsPositions:
                        subfield = subfieldsToAdd[tag][fieldPosition]\
                            [subfieldPosition]
                        record_add_subfield_into(record, tag, subfield[0], \
                                                 subfield[1], \
                                                 subfield_position = \
                                                     int(subfieldPosition), \
                                                 field_position_local = \
                                                     int(fieldPosition))

            response['resultCode'] = \
                CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields']

        elif request_type == 'modifyField': # changing the field structure
            # first remove subfields and then add new... change the indices
            subfields = data['subFields'] # parse the JSON representation of
                                          # the subfields here

            new_field = create_field(subfields, data['ind1'], data['ind2'])
            record_replace_field(record, data['tag'], new_field, \
                field_position_local = data['fieldPosition'])
            response['resultCode'] = 26

        elif request_type == 'modifyContent':
            if data['subfieldIndex'] != None:
                record_modify_subfield(record, data['tag'],
                    data['subfieldCode'], data['value'],
                    int(data['subfieldIndex']),
                    field_position_local=field_position_local)
            else:
                record_modify_controlfield(record, data['tag'], data["value"],
                  field_position_local=field_position_local)
            response['resultCode'] = 24

        elif request_type == 'moveSubfield':
            record_move_subfield(record, data['tag'],
                int(data['subfieldIndex']), int(data['newSubfieldIndex']),
                field_position_local=field_position_local)
            response['resultCode'] = 25

        elif request_type == 'moveField':
            if data['direction'] == 'up':
                final_position_local = field_position_local-1
            else: # direction is 'down'
                final_position_local = field_position_local+1
            record_move_fields(record, data['tag'], [field_position_local],
                final_position_local)
            response['resultCode'] = 32

        elif request_type == 'deleteFields':
            to_delete = data['toDelete']
            deleted_fields = 0
            deleted_subfields = 0
            for tag in to_delete:
                #Sorting the fields in a edcreasing order by the local position!
                fieldsOrder = to_delete[tag].keys()
                fieldsOrder.sort(lambda a, b: int(b) - int(a))
                for field_position_local in fieldsOrder:
                    if not to_delete[tag][field_position_local]:
                        # No subfields specified - delete entire field.
                        record_delete_field(record, tag,
                            field_position_local=int(field_position_local))
                        deleted_fields += 1
                    else:
                        for subfield_position in \
                                to_delete[tag][field_position_local][::-1]:
                            # Delete subfields in reverse order (to keep the
                            # indexing correct).
                            record_delete_subfield_from(record, tag,
                                int(subfield_position),
                                field_position_local=int(field_position_local))
                            deleted_subfields += 1
            if deleted_fields == 1 and deleted_subfields == 0:
                response['resultCode'] = 26
            elif deleted_fields and deleted_subfields == 0:
                response['resultCode'] = 27
            elif deleted_subfields == 1 and deleted_fields == 0:
                response['resultCode'] = 28
            elif deleted_subfields and deleted_fields == 0:
                response['resultCode'] = 29
            else:
                response['resultCode'] = 30
        response['cacheMTime'], response['cacheDirty'] = \
            update_cache_file_contents(recid, uid, record_revision, record, \
                                       pending_changes, \
                                       deactivated_hp_changes, \
                                       undo_list, redo_list), \
            True

    return response
Example #22
0
 def process_record(self, record):
     """@see: BaseFieldCommand.process_record"""
     bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2)
     self._modifications += 1
Example #23
0
                                                    ind1="%",
                                                    ind2="%")
            if fields_500 is not None:
                field_positions = []
                for field in fields_500:
                    subfields = field_get_subfield_instances(field)
                    for subfield in subfields:
                        if re.match("^.?((temporary|brief) entry).?$",
                                    subfield[1].lower(), re.IGNORECASE):
                            field_positions.append(
                                (field[1], field[2], field[4]))

                for ind1, ind2, pos in field_positions:
                    record_delete_field(record,
                                        '500',
                                        ind1=ind1,
                                        ind2=ind2,
                                        field_position_global=pos)

            # Now compare new version with existing one, returning a diff[tag] = (diffcode, [..])
            # None - if field is the same for both records
            # ('r',) - field missing from input record, ignored ATM
            # ('a',) - new field added, should be updated with append
            # ('c', difference_comparison) -> if field field_id exists in both records, but it's value has changed
            #                              -> uploaded with correct if accepted
            fields_to_add = []
            fields_to_correct = []
            holdingpen = False

            difference = record_diff(existing_record,
                                     record,
Example #24
0
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec['001'][0][3]
    if not 'hidden' in [
            x.lower() for x in record_get_field_values(rec, "980", code="a")
    ]:
        record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = [
        "024", "041", "035", "037", "088", "100", "110", "111", "242", "245",
        "246", "260", "269", "300", "502", "650", "653", "693", "700", "710",
        "773", "856", "520", "500", "980"
    ]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, '980', code='a'):
        if 'NOTE' in value.upper():
            collections.add('NOTE')
        if 'THESIS' in value.upper():
            collections.add('THESIS')
        if 'CONFERENCEPAPER' in value.upper():
            collections.add('ConferencePaper')

    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not 'NOTE' in collections:
        # TODO: Move this to a KB
        kb = [
            'ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-', 'ALICE-INT-',
            'LHCb-PUB-'
        ]
        values = record_get_field_values(rec, "088", code='a')
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add('NOTE')
                break

    # 980 Arxiv tag
    if record_get_field_values(rec,
                               '035',
                               filter_subfield_code="a",
                               filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add('HEP')
    collections.add('CORE')

    # 980 Conference Note
    if not 'ConferencePaper' in collections:
        for value in record_get_field_values(rec, '962', code='n'):
            if value[-2:].isdigit():
                collections.add('ConferencePaper')
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec,
                                      '690',
                                      filter_subfield_code="a",
                                      filter_subfield_value='INTNOTE')
    if intnote:
        val_088 = record_get_field_values(rec, '088', filter_subfield_code="a")
        for val in val_088:
            if 'CMS' in val:
                url = ('http://weblib.cern.ch/abstract?CERN-CMS' +
                       val.split('CMS', 1)[-1])
                record_add_field(rec, '856', ind1='4', subfields=[('u', url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, '041')
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            if "eng" in subs['a']:
                continue
            new_value = translate_config(subs['a'][0], languages)
            new_subs = [('a', new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, '035')
    forbidden_values = [
        "cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"
    ]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            if not 'a' in subs:
                continue
            for sub in subs['9']:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs['9']]
                if 'spires' in suffixes:
                    new_subs = [('a', 'SPIRES-%s' % subs['a'][0])]
                    record_add_field(rec, '970', subfields=new_subs)
                    continue
        if 'a' in subs:
            for sub in subs['a']:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec,
                                        tag="035",
                                        field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, '088')
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            for val in subs['9']:
                if val.startswith('P0') or val.startswith('CM-P0'):
                    sf = [('9', 'CERN'), ('b', val)]
                    record_add_field(rec, '595', subfields=sf)
        for key, val in field[0]:
            if key in ['a', '9'] and not val.startswith('SIS-'):
                record_add_field(rec, '037', subfields=[('a', val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, '037')
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            for value in subs['a']:
                if 'arXiv' in value:
                    new_subs = [('a', value), ('9', 'arXiv')]
                    for fld in record_get_field_instances(rec, '695'):
                        for key, val in field_get_subfield_instances(fld):
                            if key == 'a':
                                new_subs.append(('c', val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, '037', nf, field[4])
        for key, val in field[0]:
            if key in ['a', '9'] and val.startswith('SIS-'):
                record_delete_field(rec, '037', field_position_global=field[4])

    for field in record_get_field_instances(rec, '242'):
        record_add_field(rec, '246', subfields=field[0])
    record_delete_fields(rec, '242')

    # 269 Date normalization
    for field in record_get_field_instances(rec, '269'):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not 'THESIS' in collections:
        for field in record_get_field_instances(rec, '260'):
            record_add_field(rec, '269', subfields=field[0])
        record_delete_fields(rec, '260')

    # 300 page number
    for field in record_get_field_instances(rec, '300'):
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                if "mult." not in value and value != " p":
                    field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value))
                else:
                    record_delete_field(rec,
                                        '300',
                                        field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, '100')
    author_names.extend(record_get_field_instances(rec, '700'))
    for field in author_names:
        subs = field_get_subfields(field)
        if not 'i' in subs or 'XX' in subs['i']:
            if not 'j' in subs or 'YY' in subs['j']:
                for idx, (key, value) in enumerate(field[0]):
                    if key == 'a':
                        field[0][idx] = ('a', punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if 'THESIS' in collections:
        for field in record_get_field_instances(rec, '700'):
            record_add_field(rec, '701', subfields=field[0])
        record_delete_fields(rec, '700')

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, '502')
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == 'a':
                new_subs.append(('b', value))
            elif key == 'b':
                new_subs.append(('c', value))
            elif key == 'c':
                new_subs.append(('d', value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec,
                                                 '650',
                                                 ind1='1',
                                                 ind2='7')
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [('2', 'INSPIRE'), ('a', new_value)]
                else:
                    new_subs = [('2', 'SzGeCERN'), ('a', value)]
                record_add_field(rec,
                                 "650",
                                 ind1="1",
                                 ind2="7",
                                 subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, '653', ind1='1'):
        subs = field_get_subfields(field)
        new_subs = []
        if 'a' in subs:
            for val in subs['a']:
                new_subs.extend([('9', 'author'), ('a', val)])
        new_field = create_field(subfields=new_subs, ind1='1')
        record_replace_field(rec,
                             '653',
                             new_field,
                             field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, '693'):
        subs = field_get_subfields(field)
        all_subs = subs.get('a', []) + subs.get('e', [])
        if 'not applicable' in [x.lower() for x in all_subs]:
            record_delete_field(rec, '693', field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == 'a':
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == 'e':
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e)
        translated_experiments = translate_config(experiment, experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693", field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, '710'):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == '5':
                subs.pop(idx)
            elif value.startswith('CERN. Geneva'):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, '710', field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, '773'):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == 'p':
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773", field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, '856', ind1='4'):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if 'z' in subs:
            is_figure = [s for s in subs['z'] if "figure" in s.lower()]
            if is_figure and 'u' in subs:
                is_subformat = [
                    s for s in subs['u'] if "subformat" in s.lower()
                ]
                if not is_subformat:
                    url = subs['u'][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url),
                                                dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url),
                               verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url,
                                                    download_to_file=local_url,
                                                    timeout=30.0)
                        except InvenioFileDownloadError:
                            _print(
                                "Download failed while attempting to reach %s. Skipping.."
                                % (url, ))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" %
                                       (local_url, url),
                                       verbose=5)
                            else:
                                _print("Conversion failed on %s" %
                                       (local_url, ))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(('a', url))
                        newsubs.append(('t', 'Plot'))
                        figure_counter += 1
                        if 'y' in subs:
                            newsubs.append(
                                ('d',
                                 "%05d %s" % (figure_counter, subs['y'][0])))
                            newsubs.append(('n', subs['y'][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(
                                os.path.splitext(subs['u'][0])[0])
                            newsubs.append(
                                ('d', "%05d %s" % (figure_counter, name)))
                            newsubs.append(('n', name))

        if not newsubs and 'u' in subs:
            is_fulltext = [s for s in subs['u'] if ".pdf" in s]
            if is_fulltext:
                newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])]

        if not newsubs and 'u' in subs:
            remove = True
            is_zipfile = [s for s in subs['u'] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR,
                                         os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0],
                                                  download_to_file=local_url,
                                                  timeout=30.0)
                except InvenioFileDownloadError:
                    _print(
                        "Download failed while attempting to reach %s. Skipping.."
                        % (is_zipfile[0], ))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(('a', png))
                        caption = '%05d %s' % (figure_counter,
                                               os.path.basename(png))
                        plotsubs.append(('d', caption))
                        plotsubs.append(('t', 'Plot'))
                        record_add_field(rec, 'FFT', subfields=plotsubs)

        if not remove and not newsubs and 'u' in subs:
            urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch',
                    'http://cmsdoc.cern.ch', 'http://documents.cern.ch',
                    'http://preprints.cern.ch', 'http://cds.cern.ch')
            for val in subs['u']:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith('ps.gz'):
                    remove = True

        if newsubs:
            record_add_field(rec, 'FFT', subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec,
                                '856',
                                ind1='4',
                                field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [('a', "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, '980', subfields=[('a', collection)])

    return rec
Example #25
0
def check_variants(record, sort_variants):
    """
    are variants unique?
    list variants only once, delete more fields if no other subfield (e.g. $$b), set_invalid otherwise
    normalize variants
    option to sort variants
    """
    from invenio.bibrecord import record_delete_field, record_add_field
    from operator import itemgetter
    import copy

    all_variants = []
    recid = record.record_id

    for field in record['730']:
        # get info for field
        name = ''
        othersubfields = False
        position_name = None
        letter = ' '
        for num_sf, (code, value) in enumerate(field[0]):
            if code == 'a':
                name = value
                position_name = num_sf
            else:
                othersubfields = True
                if code == 'b':
                    letter = value
        if not name:
            record.set_invalid('field 730 without $$a subfield: %s. ' % (field, ))
            sort_variants = False
            continue

        norm_name = normalize_name(name)
        # is it normalized?
        if not name == norm_name:
            record.set_amended('normalized name variant: "%s" ' % name)
            field[0][position_name] = ('a', norm_name)
        if norm_name in all_variants:
            # avoid adding variants multiple times
            if othersubfields:
                # let a human do this
                record.set_invalid('variant is listed twice: "%s" ' % name)
            else:
                record.set_amended('deleted already existing variant: "%s" ' % name)
                record_delete_field(record, '730', field_position_global=field[4])
        else:
            all_variants.append(norm_name)
            result = searchforothervariant(norm_name, recid)
            if result:
                record.set_invalid('Name variant "%s" exists in other record %s. ' % (norm_name, result))

    if sort_variants:
        # sort by letter ($$b), length (longest first), name
        sort_index = {}
        for num_f, field in enumerate(record['730']):
            # get info for field
            name = ''
            letter = ' '
            for code, value in field[0]:
                if code == 'a':
                    name = value
                elif code == 'b':
                    letter = value    
            sort_index[num_f] = (letter, len(name)*-1, name)
        
        m730 = copy.deepcopy(record['730'])
        sorted_keys = [k for k, _ in sorted(sort_index.items(), key=itemgetter(1))]
        m730_sort = [m730[num_f] for num_f in sorted_keys]
        if m730_sort != m730:
            # we have to get rid of global positions to really sort it
            record.set_amended('Name variants sorted. ')
            record_delete_field(record, '730')
            for field in m730_sort:
                record_add_field(record, '730', ind1=' ', ind2=' ', subfields=field[0], controlfield_value='')

    return all_variants
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec['001'][0][3]
    if not 'hidden' in [x.lower() for x in record_get_field_values(rec, "980",
                                                                   code="a")]:
        record_add_field(rec, '035', subfields=[('9', 'CDS'), ('a', cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = ["024", "041", "035", "037", "088", "100",
                          "110", "111", "242", "245", "246", "260",
                          "269", "300", "502", "650", "653", "693",
                          "700", "710", "773", "856", "520", "500",
                          "980"]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, '980', code='a'):
        if 'NOTE' in value.upper():
            collections.add('NOTE')
        if 'THESIS' in value.upper():
            collections.add('THESIS')
        if 'CONFERENCEPAPER' in value.upper():
            collections.add('ConferencePaper')


    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not 'NOTE' in collections:
        # TODO: Move this to a KB
        kb = ['ATLAS-CONF-', 'CMS-PAS-', 'ATL-', 'CMS-DP-',
              'ALICE-INT-', 'LHCb-PUB-']
        values = record_get_field_values(rec, "088", code='a')
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add('NOTE')
                break

    # 980 Arxiv tag
    if record_get_field_values(rec, '035', filter_subfield_code="a",
                               filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add('HEP')
    collections.add('CORE')

    # 980 Conference Note
    if not 'ConferencePaper' in collections:
        for value in record_get_field_values(rec, '962', code='n'):
            if value[-2:].isdigit():
                collections.add('ConferencePaper')
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec, '690', filter_subfield_code="a",
                                      filter_subfield_value='INTNOTE')
    if intnote:
        val_088 = record_get_field_values(rec, '088', filter_subfield_code="a")
        for val in val_088:
            if 'CMS' in val:
                url = ('http://weblib.cern.ch/abstract?CERN-CMS' +
                       val.split('CMS', 1)[-1])
                record_add_field(rec, '856', ind1='4', subfields=[('u', url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, '041')
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            if "eng" in subs['a']:
                continue
            new_value = translate_config(subs['a'][0], languages)
            new_subs = [('a', new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, '035')
    forbidden_values = ["cercer",
                        "inspire",
                        "xx",
                        "cern annual report",
                        "cmscms",
                        "wai01"]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            if not 'a' in subs:
                continue
            for sub in subs['9']:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs['9']]
                if 'spires' in suffixes:
                    new_subs = [('a', 'SPIRES-%s' % subs['a'][0])]
                    record_add_field(rec, '970', subfields=new_subs)
                    continue
        if 'a' in subs:
            for sub in subs['a']:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec, tag="035",
                                        field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, '088')
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if '9' in subs:
            for val in subs['9']:
                if val.startswith('P0') or val.startswith('CM-P0'):
                    sf = [('9', 'CERN'), ('b', val)]
                    record_add_field(rec, '595', subfields=sf)
        for key, val in field[0]:
            if key in ['a', '9'] and not val.startswith('SIS-'):
                record_add_field(rec, '037', subfields=[('a', val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, '037')
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if 'a' in subs:
            for value in subs['a']:
                if 'arXiv' in value:
                    new_subs = [('a', value), ('9', 'arXiv')]
                    for fld in record_get_field_instances(rec,  '695'):
                        for key, val in field_get_subfield_instances(fld):
                            if key == 'a':
                                new_subs.append(('c', val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, '037', nf, field[4])
        for key, val in field[0]:
            if key in ['a', '9'] and val.startswith('SIS-'):
                record_delete_field(rec, '037', field_position_global=field[4])

    for field in record_get_field_instances(rec, '242'):
        record_add_field(rec, '246', subfields=field[0])
    record_delete_fields(rec, '242')

    # 269 Date normalization
    for field in record_get_field_instances(rec, '269'):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not 'THESIS' in collections:
        for field in record_get_field_instances(rec, '260'):
            record_add_field(rec, '269', subfields=field[0])
        record_delete_fields(rec, '260')

    # 300 page number
    for field in record_get_field_instances(rec, '300'):
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                if "mult." not in value and value != " p":
                    field[0][idx] = ('a', re.sub(r'[^\d-]+', '', value))
                else:
                    record_delete_field(rec, '300',
                                        field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, '100')
    author_names.extend(record_get_field_instances(rec, '700'))
    for field in author_names:
        subs = field_get_subfields(field)
        if not 'i' in subs or 'XX' in subs['i']:
            if not 'j' in subs or 'YY' in subs['j']:
                for idx, (key, value) in enumerate(field[0]):
                    if key == 'a':
                        field[0][idx] = ('a', punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if 'THESIS' in collections:
        for field in record_get_field_instances(rec, '700'):
            record_add_field(rec, '701', subfields=field[0])
        record_delete_fields(rec, '700')

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, '502')
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == 'a':
                new_subs.append(('b', value))
            elif key == 'b':
                new_subs.append(('c', value))
            elif key == 'c':
                new_subs.append(('d', value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec, '650', ind1='1', ind2='7')
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == 'a':
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [('2', 'INSPIRE'), ('a', new_value)]
                else:
                    new_subs = [('2', 'SzGeCERN'), ('a', value)]
                record_add_field(rec, "650", ind1="1", ind2="7",
                                 subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, '653', ind1='1'):
        subs = field_get_subfields(field)
        new_subs = []
        if 'a' in subs:
            for val in subs['a']:
                new_subs.extend([('9', 'author'), ('a', val)])
        new_field = create_field(subfields=new_subs, ind1='1')
        record_replace_field(rec, '653', new_field, field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, '693'):
        subs = field_get_subfields(field)
        all_subs = subs.get('a', []) + subs.get('e', [])
        if 'not applicable' in [x.lower() for x in all_subs]:
            record_delete_field(rec, '693',
                                field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == 'a':
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == 'e':
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"),
                                  experiment_e)
        translated_experiments = translate_config(experiment,
                                                  experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693",
                            field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, '710'):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == '5':
                subs.pop(idx)
            elif value.startswith('CERN. Geneva'):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, '710', field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, '773'):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == 'p':
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773",
                            field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, '856', ind1='4'):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if 'z' in subs:
            is_figure = [s for s in subs['z'] if "figure" in s.lower()]
            if is_figure and 'u' in subs:
                is_subformat = [s for s in subs['u'] if "subformat" in s.lower()]
                if not is_subformat:
                    url = subs['u'][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url), verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url,
                                                    download_to_file=local_url,
                                                    timeout=30.0)
                        except InvenioFileDownloadError:
                            _print("Download failed while attempting to reach %s. Skipping.." % (url,))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" % (local_url, url), verbose=5)
                            else:
                                _print("Conversion failed on %s" % (local_url,))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(('a', url))
                        newsubs.append(('t', 'Plot'))
                        figure_counter += 1
                        if 'y' in subs:
                            newsubs.append(('d', "%05d %s" % (figure_counter, subs['y'][0])))
                            newsubs.append(('n', subs['y'][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(os.path.splitext(subs['u'][0])[0])
                            newsubs.append(('d', "%05d %s" % (figure_counter, name)))
                            newsubs.append(('n', name))

        if not newsubs and 'u' in subs:
            is_fulltext = [s for s in subs['u'] if ".pdf" in s and not "subformat=pdfa" in s]
            if is_fulltext:
                newsubs = [('t', 'INSPIRE-PUBLIC'), ('a', subs['u'][0])]

        if not newsubs and 'u' in subs:
            remove = True
            is_zipfile = [s for s in subs['u'] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0],
                                                  download_to_file=local_url,
                                                  timeout=30.0)
                except InvenioFileDownloadError:
                    _print("Download failed while attempting to reach %s. Skipping.."
                           % (is_zipfile[0],))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(('a', png))
                        caption = '%05d %s' % (figure_counter, os.path.basename(png))
                        plotsubs.append(('d', caption))
                        plotsubs.append(('t', 'Plot'))
                        record_add_field(rec, 'FFT', subfields=plotsubs)

        if not remove and not newsubs and 'u' in subs:
            urls = ('http://cdsweb.cern.ch', 'http://cms.cern.ch',
                    'http://cmsdoc.cern.ch', 'http://documents.cern.ch',
                    'http://preprints.cern.ch', 'http://cds.cern.ch')
            for val in subs['u']:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith('ps.gz'):
                    remove = True

        if newsubs:
            record_add_field(rec, 'FFT', subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec, '856', ind1='4',
                                field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [('a', "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, '980', subfields=[('a', collection)])

    return rec
    def compare_records(self, record1, record2, opt_mode=None):
        """
        Compares two records to identify added/modified/deleted tags.

        The records are either the upload record or existing record or
        record archived.

        Returns a Tuple of Dictionaries(For modified/added/deleted tags).
        """
        def remove_control_tag(tag_list):
            """
            Returns the list of keys without any control tags
            """

            cleaned_list = [item for item in tag_list
                    if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS]
            return cleaned_list

        def group_record_tags():
            """
            Groups all the tags in a Record as Common/Added/Deleted tags.
            Returns a Tuple of 3 lists for each category mentioned above.
            """
            rec1_keys = record1.keys()
            rec2_keys = record2.keys()

            com_tag_lst = [key for key in rec1_keys if key in rec2_keys]
            # tags in record2 not present in record1
            del_tag_lst = [key for key in rec2_keys if key not in rec1_keys]
            # additional tags in record1
            add_tag_lst = [key for key in rec1_keys if key not in rec2_keys]

            return (com_tag_lst, add_tag_lst, del_tag_lst)

        # declaring dictionaries to hold the identified patch
        mod_patch = {}
        add_patch = {}
        del_patch = {}
        result = {}

        (common_tags, added_tags, deleted_tags) = group_record_tags()
        if common_tags:
            mod_patch = self.find_modified_tags(common_tags, record1, record2)

        if added_tags:
            for tag in added_tags:
                add_patch[tag] = record1[tag]

        # if record comes with correct, it should already have fields
        # marked with '0' code. If not deleted tag list will
        if deleted_tags and \
                opt_mode == 'replace' or opt_mode == 'delete':
            for tag in deleted_tags:
                del_patch[tag] = record2[tag]

        # returning back a result dictionary with all available patches
        if mod_patch:
            result['MOD'] = mod_patch

        if add_patch:
            result['ADD'] = add_patch

        if del_patch:
            # for a tag that has been deleted in the upload record in replace
            # mode, loop through all the fields of the tag and add additional
            # subfield with code '0' and value '__DELETE_FIELDS__'
            # NOTE Indicators taken into consideration while deleting fields
            for tag in del_patch:
                for data_tuple in del_patch[tag]:
                    ind1 = data_tuple[1]
                    ind2 = data_tuple[2]
                    record_delete_field(del_patch, tag, ind1, ind2)
                    record_add_field(del_patch, tag, ind1, ind2, "", [(CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)])
            result['DEL'] = del_patch

        return result
def apply_filter(rec):
    """ Filters the record to be compatible within Inspire
    Parameters:
     * rec - dictionary: BibRecord structure
    Returns: dictionary, BibRecord structure
    """
    # Move recid from 001 to 035 if not hidden
    cds_id = rec["001"][0][3]
    if not "hidden" in [x.lower() for x in record_get_field_values(rec, "980", code="a")]:
        record_add_field(rec, "035", subfields=[("9", "CDS"), ("a", cds_id)])
    # Clear control fields
    record_strip_controlfields(rec)

    # Clear other uninteresting fields
    interesting_fields = [
        "024",
        "041",
        "035",
        "037",
        "088",
        "100",
        "110",
        "111",
        "242",
        "245",
        "246",
        "260",
        "269",
        "300",
        "502",
        "650",
        "653",
        "693",
        "700",
        "710",
        "773",
        "856",
        "520",
        "500",
        "980",
    ]
    for tag in rec.keys():
        if tag not in interesting_fields:
            record_delete_fields(rec, tag)

    # 980 Determine Collections
    collections = set([])
    for value in record_get_field_values(rec, "980", code="a"):
        if "NOTE" in value.upper():
            collections.add("NOTE")
        if "THESIS" in value.upper():
            collections.add("THESIS")
        if "CONFERENCEPAPER" in value.upper():
            collections.add("ConferencePaper")

    if is_published(rec):
        collections.add("PUBLISHED")
        collections.add("CITEABLE")

    if not "NOTE" in collections:
        # TODO: Move this to a KB
        kb = ["ATLAS-CONF-", "CMS-PAS-", "ATL-", "CMS-DP-", "ALICE-INT-", "LHCb-PUB-"]
        values = record_get_field_values(rec, "088", code="a")
        for val, rep in product(values, kb):
            if val.startswith(rep):
                collections.add("NOTE")
                break

    # 980 Arxiv tag
    if record_get_field_values(rec, "035", filter_subfield_code="a", filter_subfield_value="arXiv"):
        collections.add("arXiv")

    # 980 HEP && CORE
    collections.add("HEP")
    collections.add("CORE")

    # 980 Conference Note
    if not "ConferencePaper" in collections:
        for value in record_get_field_values(rec, "962", code="n"):
            if value[-2:].isdigit():
                collections.add("ConferencePaper")
                break

    record_delete_fields(rec, "980")

    intnote = record_get_field_values(rec, "690", filter_subfield_code="a", filter_subfield_value="INTNOTE")
    if intnote:
        val_088 = record_get_field_values(rec, "088", filter_subfield_code="a")
        for val in val_088:
            if "CMS" in val:
                url = "http://weblib.cern.ch/abstract?CERN-CMS" + val.split("CMS", 1)[-1]
                record_add_field(rec, "856", ind1="4", subfields=[("u", url)])

    # 041 Language
    languages = get_languages()
    language_fields = record_get_field_instances(rec, "041")
    record_delete_fields(rec, "041")
    for field in language_fields:
        subs = field_get_subfields(field)
        if "a" in subs:
            if "eng" in subs["a"]:
                continue
            new_value = translate_config(subs["a"][0], languages)
            new_subs = [("a", new_value)]
            record_add_field(rec, "041", subfields=new_subs)

    # 035 Externals
    scn_035_fields = record_get_field_instances(rec, "035")
    forbidden_values = ["cercer", "inspire", "xx", "cern annual report", "cmscms", "wai01"]
    for field in scn_035_fields:
        subs = field_get_subfields(field)
        if "9" in subs:
            if not "a" in subs:
                continue
            for sub in subs["9"]:
                if sub.lower() in forbidden_values:
                    break
            else:
                # No forbidden values (We did not "break")
                suffixes = [s.lower() for s in subs["9"]]
                if "spires" in suffixes:
                    new_subs = [("a", "SPIRES-%s" % subs["a"][0])]
                    record_add_field(rec, "970", subfields=new_subs)
                    continue
        if "a" in subs:
            for sub in subs["a"]:
                if sub.lower() in forbidden_values:
                    record_delete_field(rec, tag="035", field_position_global=field[4])

    rep_088_fields = record_get_field_instances(rec, "088")
    for field in rep_088_fields:
        subs = field_get_subfields(field)
        if "9" in subs:
            for val in subs["9"]:
                if val.startswith("P0") or val.startswith("CM-P0"):
                    sf = [("9", "CERN"), ("b", val)]
                    record_add_field(rec, "595", subfields=sf)
        for key, val in field[0]:
            if key in ["a", "9"] and not val.startswith("SIS-"):
                record_add_field(rec, "037", subfields=[("a", val)])
    record_delete_fields(rec, "088")

    # 037 Externals also...
    rep_037_fields = record_get_field_instances(rec, "037")
    for field in rep_037_fields:
        subs = field_get_subfields(field)
        if "a" in subs:
            for value in subs["a"]:
                if "arXiv" in value:
                    new_subs = [("a", value), ("9", "arXiv")]
                    for fld in record_get_field_instances(rec, "695"):
                        for key, val in field_get_subfield_instances(fld):
                            if key == "a":
                                new_subs.append(("c", val))
                                break
                    nf = create_field(subfields=new_subs)
                    record_replace_field(rec, "037", nf, field[4])
        for key, val in field[0]:
            if key in ["a", "9"] and val.startswith("SIS-"):
                record_delete_field(rec, "037", field_position_global=field[4])

    for field in record_get_field_instances(rec, "242"):
        record_add_field(rec, "246", subfields=field[0])
    record_delete_fields(rec, "242")

    # 269 Date normalization
    for field in record_get_field_instances(rec, "269"):
        for idx, (key, value) in enumerate(field[0]):
            if key == "c":
                field[0][idx] = ("c", convert_date_to_iso(value))
                record_delete_fields(rec, "260")

    if not "THESIS" in collections:
        for field in record_get_field_instances(rec, "260"):
            record_add_field(rec, "269", subfields=field[0])
        record_delete_fields(rec, "260")

    # 300 page number
    for field in record_get_field_instances(rec, "300"):
        for idx, (key, value) in enumerate(field[0]):
            if key == "a":
                if "mult." not in value and value != " p":
                    field[0][idx] = ("a", re.sub(r"[^\d-]+", "", value))
                else:
                    record_delete_field(rec, "300", field_position_global=field[4])
                    break

    # 100 & 700 punctuate author names
    author_names = record_get_field_instances(rec, "100")
    author_names.extend(record_get_field_instances(rec, "700"))
    for field in author_names:
        subs = field_get_subfields(field)
        if not "i" in subs or "XX" in subs["i"]:
            if not "j" in subs or "YY" in subs["j"]:
                for idx, (key, value) in enumerate(field[0]):
                    if key == "a":
                        field[0][idx] = ("a", punctuate_authorname(value))

    # 700 -> 701 Thesis supervisors
    if "THESIS" in collections:
        for field in record_get_field_instances(rec, "700"):
            record_add_field(rec, "701", subfields=field[0])
        record_delete_fields(rec, "700")

    # 501 move subfields
    fields_501 = record_get_field_instances(rec, "502")
    for idx, field in enumerate(fields_501):
        new_subs = []
        for key, value in field[0]:
            if key == "a":
                new_subs.append(("b", value))
            elif key == "b":
                new_subs.append(("c", value))
            elif key == "c":
                new_subs.append(("d", value))
            else:
                new_subs.append((key, value))
        fields_501[idx] = field_swap_subfields(field, new_subs)

    # 650 Translate Categories
    categories = get_categories()
    category_fields = record_get_field_instances(rec, "650", ind1="1", ind2="7")
    record_delete_fields(rec, "650")
    for field in category_fields:
        for idx, (key, value) in enumerate(field[0]):
            if key == "a":
                new_value = translate_config(value, categories)
                if new_value != value:
                    new_subs = [("2", "INSPIRE"), ("a", new_value)]
                else:
                    new_subs = [("2", "SzGeCERN"), ("a", value)]
                record_add_field(rec, "650", ind1="1", ind2="7", subfields=new_subs)
                break

    # 653 Free Keywords
    for field in record_get_field_instances(rec, "653", ind1="1"):
        subs = field_get_subfields(field)
        new_subs = []
        if "a" in subs:
            for val in subs["a"]:
                new_subs.extend([("9", "author"), ("a", val)])
        new_field = create_field(subfields=new_subs, ind1="1")
        record_replace_field(rec, "653", new_field, field_position_global=field[4])

    experiments = get_experiments()
    # 693 Remove if 'not applicable'
    for field in record_get_field_instances(rec, "693"):
        subs = field_get_subfields(field)
        all_subs = subs.get("a", []) + subs.get("e", [])
        if "not applicable" in [x.lower() for x in all_subs]:
            record_delete_field(rec, "693", field_position_global=field[4])
        new_subs = []
        experiment_a = ""
        experiment_e = ""
        for (key, value) in subs.iteritems():
            if key == "a":
                experiment_a = value[0]
                new_subs.append((key, value[0]))
            elif key == "e":
                experiment_e = value[0]
        experiment = "%s---%s" % (experiment_a.replace(" ", "-"), experiment_e)
        translated_experiments = translate_config(experiment, experiments)
        new_subs.append(("e", translated_experiments))
        record_delete_field(rec, tag="693", field_position_global=field[4])
        record_add_field(rec, "693", subfields=new_subs)

    # 710 Collaboration
    for field in record_get_field_instances(rec, "710"):
        subs = field_get_subfield_instances(field)
        for idx, (key, value) in enumerate(subs[:]):
            if key == "5":
                subs.pop(idx)
            elif value.startswith("CERN. Geneva"):
                subs.pop(idx)
        if len(subs) == 0:
            record_delete_field(rec, "710", field_position_global=field[4])

    # 773 journal translations
    journals = get_journals()
    for field in record_get_field_instances(rec, "773"):
        subs = field_get_subfield_instances(field)
        new_subs = []
        for idx, (key, value) in enumerate(subs):
            if key == "p":
                new_subs.append((key, translate_config(value, journals)))
            else:
                new_subs.append((key, value))
        record_delete_field(rec, tag="773", field_position_global=field[4])
        record_add_field(rec, "773", subfields=new_subs)

    # FFT (856) Dealing with graphs
    figure_counter = 0
    for field in record_get_field_instances(rec, "856", ind1="4"):
        subs = field_get_subfields(field)

        newsubs = []
        remove = False

        if "z" in subs:
            is_figure = [s for s in subs["z"] if "figure" in s.lower()]
            if is_figure and "u" in subs:
                is_subformat = [s for s in subs["u"] if "subformat" in s.lower()]
                if not is_subformat:
                    url = subs["u"][0]
                    if url.endswith(".pdf"):
                        # We try to convert
                        fd, local_url = mkstemp(suffix=os.path.basename(url), dir=CFG_TMPSHAREDDIR)
                        os.close(fd)
                        _print("Downloading %s into %s" % (url, local_url), verbose=5)
                        plotfile = ""
                        try:
                            plotfile = download_url(url=url, download_to_file=local_url, timeout=30.0)
                        except InvenioFileDownloadError:
                            _print("Download failed while attempting to reach %s. Skipping.." % (url,))
                            remove = True
                        if plotfile:
                            converted = convert_images([plotfile])
                            if converted:
                                url = converted.pop()
                                _print("Successfully converted %s to %s" % (local_url, url), verbose=5)
                            else:
                                _print("Conversion failed on %s" % (local_url,))
                                url = None
                                remove = True
                    if url:
                        newsubs.append(("a", url))
                        newsubs.append(("t", "Plot"))
                        figure_counter += 1
                        if "y" in subs:
                            newsubs.append(("d", "%05d %s" % (figure_counter, subs["y"][0])))
                            newsubs.append(("n", subs["y"][0]))
                        else:
                            # Get basename without extension.
                            name = os.path.basename(os.path.splitext(subs["u"][0])[0])
                            newsubs.append(("d", "%05d %s" % (figure_counter, name)))
                            newsubs.append(("n", name))

        if not newsubs and "u" in subs:
            is_fulltext = [s for s in subs["u"] if ".pdf" in s]
            if is_fulltext:
                newsubs = [("t", "INSPIRE-PUBLIC"), ("a", subs["u"][0])]

        if not newsubs and "u" in subs:
            remove = True
            is_zipfile = [s for s in subs["u"] if ".zip" in s]
            if is_zipfile:
                url = is_zipfile[0]
                local_url = os.path.join(CFG_TMPSHAREDDIR, os.path.basename(url))
                _print("Downloading %s into %s" % (url, local_url), verbose=5)
                zipped_archive = ""
                try:
                    zipped_archive = download_url(url=is_zipfile[0], download_to_file=local_url, timeout=30.0)
                except InvenioFileDownloadError:
                    _print("Download failed while attempting to reach %s. Skipping.." % (is_zipfile[0],))
                    remove = True
                if zipped_archive:
                    unzipped_archive = unzip(zipped_archive)
                    list_of_pngs = locate("*.png", unzipped_archive)
                    for png in list_of_pngs:
                        if "_vti_" in png or "__MACOSX" in png:
                            continue
                        figure_counter += 1
                        plotsubs = []
                        plotsubs.append(("a", png))
                        caption = "%05d %s" % (figure_counter, os.path.basename(png))
                        plotsubs.append(("d", caption))
                        plotsubs.append(("t", "Plot"))
                        record_add_field(rec, "FFT", subfields=plotsubs)

        if not remove and not newsubs and "u" in subs:
            urls = (
                "http://cdsweb.cern.ch",
                "http://cms.cern.ch",
                "http://cmsdoc.cern.ch",
                "http://documents.cern.ch",
                "http://preprints.cern.ch",
                "http://cds.cern.ch",
            )
            for val in subs["u"]:
                if any(url in val for url in urls):
                    remove = True
                    break
                if val.endswith("ps.gz"):
                    remove = True

        if newsubs:
            record_add_field(rec, "FFT", subfields=newsubs)
            remove = True

        if remove:
            record_delete_field(rec, "856", ind1="4", field_position_global=field[4])

    # 500 - Preliminary results
    if "THESIS" not in collections:
        subs = [("a", "Preliminary results")]
        record_add_field(rec, "500", subfields=subs)

    for collection in collections:
        record_add_field(rec, "980", subfields=[("a", collection)])

    return rec
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, changeApplied, isBulk=False):
    """Handle record update requests like adding, modifying, moving or deleting
    of fields or subfields. Possible common error situations:
    - Missing cache file
    - Cache file modified in other editor
    """

    response = {}

    if not cache_exists(recid, uid):
        response['resultCode'] = 106
    elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False:
        # In case of a bulk request, the changes are deliberately performed imemdiately one after another
        response['resultCode'] = 107
    else:
        try:
            record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents(recid, uid)[1:]
        except:
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format']
            return response;

        if changeApplied != -1:
            pending_changes = pending_changes[:changeApplied] + pending_changes[changeApplied+1:]

        field_position_local = data.get('fieldPosition')
        if field_position_local is not None:
            field_position_local = int(field_position_local)
        if request_type == 'overrideChangesList':
            pending_changes = data['newChanges']
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed']
        elif request_type == 'removeChange':
            #the change is removed automatically by passing the changeApplied parameter
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['editor_modifications_changed']
        elif request_type == 'desactivateHoldingPenChangeset':
            # the changeset has been marked as processed ( user applied it in the editor)
            # marking as used in the cache file
            # CAUTION: This function has been implemented here because logically it fits
            #          with the modifications made to the cache file. No changes are made to the
            #          Holding Pen physically. The changesets are related to the cache because
            #          we want to cancel the removal every time the cache disappears for any reason
            desactivated_hp_changes[data.get('desactivatedChangeset')] = True;
            response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['disabled_hp_changeset']
        elif request_type == 'addField':
            if data['controlfield']:
                record_add_field(record, data['tag'],
                                 controlfield_value=data['value'])
                response['resultCode'] = 20
            else:
                record_add_field(record, data['tag'], data['ind1'],
                                 data['ind2'], subfields=data['subfields'],
                                 field_position_local=field_position_local)
                response['resultCode'] = 21

        elif request_type == 'addSubfields':
            subfields = data['subfields']
            for subfield in subfields:
                record_add_subfield_into(record, data['tag'], subfield[0],
                    subfield[1], subfield_position=None,
                    field_position_local=field_position_local)
            if len(subfields) == 1:
                response['resultCode'] = 22
            else:
                response['resultCode'] = 23
        elif request_type == 'modifyField': # changing the field structure
            # first remove subfields and then add new... change the indices
            subfields = data['subFields'] # parse the JSON representation of the subfields here

            new_field = create_field(subfields, data['ind1'], data['ind2']);
            record_replace_field(record, data['tag'], new_field, field_position_local = data['fieldPosition'])
            response['resultCode'] = 26
            #response['debuggingValue'] = data['subFields'];

        elif request_type == 'modifyContent':
            if data['subfieldIndex'] != None:
                record_modify_subfield(record, data['tag'],
                    data['subfieldCode'], data['value'],
                    int(data['subfieldIndex']),
                    field_position_local=field_position_local)
            else:
                record_modify_controlfield(record, data['tag'], data["value"],
                  field_position_local=field_position_local)
            response['resultCode'] = 24

        elif request_type == 'moveSubfield':
            record_move_subfield(record, data['tag'],
                int(data['subfieldIndex']), int(data['newSubfieldIndex']),
                field_position_local=field_position_local)
            response['resultCode'] = 25

        elif request_type == 'moveField':
            if data['direction'] == 'up':
                final_position_local = field_position_local-1
            else: # direction is 'down'
                final_position_local = field_position_local+1
            record_move_fields(record, data['tag'], [field_position_local],
                final_position_local)
            response['resultCode'] = 32

        elif request_type == 'deleteFields':
            to_delete = data['toDelete']
            deleted_fields = 0
            deleted_subfields = 0
            for tag in to_delete:
                # Sorting the fields in a edcreasing order by the local position !
                fieldsOrder = to_delete[tag].keys()
                fieldsOrder.sort(lambda a,b: int(b)-int(a))
                for field_position_local in fieldsOrder:
                    if not to_delete[tag][field_position_local]:
                        # No subfields specified - delete entire field.
                        record_delete_field(record, tag,
                            field_position_local=int(field_position_local))
                        deleted_fields += 1
                    else:
                        for subfield_position in \
                                to_delete[tag][field_position_local][::-1]:
                            # Delete subfields in reverse order (to keep the
                            # indexing correct).
                            record_delete_subfield_from(record, tag,
                                int(subfield_position),
                                field_position_local=int(field_position_local))
                            deleted_subfields += 1
            if deleted_fields == 1 and deleted_subfields == 0:
                response['resultCode'] = 26
            elif deleted_fields and deleted_subfields == 0:
                response['resultCode'] = 27
            elif deleted_subfields == 1 and deleted_fields == 0:
                response['resultCode'] = 28
            elif deleted_subfields and deleted_fields == 0:
                response['resultCode'] = 29
            else:
                response['resultCode'] = 30
        response['cacheMTime'], response['cacheDirty'] = \
            update_cache_file_contents(recid, uid, record_revision, record, \
                                       pending_changes, desactivated_hp_changes), \
            True

    return response
Example #30
0
    def compare_records(self, record1, record2, opt_mode=None):
        """
        Compares two records to identify added/modified/deleted tags.

        The records are either the upload record or existing record or
        record archived.

        Returns a Tuple of Dictionaries(For modified/added/deleted tags).
        """
        def remove_control_tag(tag_list):
            """
            Returns the list of keys without any control tags
            """

            cleaned_list = [
                item for item in tag_list
                if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS
            ]
            return cleaned_list

        def group_record_tags():
            """
            Groups all the tags in a Record as Common/Added/Deleted tags.
            Returns a Tuple of 3 lists for each category mentioned above.
            """
            rec1_keys = record1.keys()
            rec2_keys = record2.keys()

            com_tag_lst = [key for key in rec1_keys if key in rec2_keys]
            # tags in record2 not present in record1
            del_tag_lst = [key for key in rec2_keys if key not in rec1_keys]
            # additional tags in record1
            add_tag_lst = [key for key in rec1_keys if key not in rec2_keys]

            return (com_tag_lst, add_tag_lst, del_tag_lst)

        # declaring dictionaries to hold the identified patch
        mod_patch = {}
        add_patch = {}
        del_patch = {}
        result = {}

        (common_tags, added_tags, deleted_tags) = group_record_tags()

        if common_tags:
            mod_patch = self.find_modified_tags(common_tags, record1, record2)

        if added_tags:
            for tag in added_tags:
                add_patch[tag] = record1[tag]

        # if record comes with correct, it should already have fields
        # marked with '0' code. If not deleted tag list will
        if deleted_tags and \
                opt_mode == 'replace' or opt_mode == 'delete':
            for tag in deleted_tags:
                del_patch[tag] = record2[tag]

        # returning back a result dictionary with all available patches
        if mod_patch:
            result['MOD'] = mod_patch

        if add_patch:
            result['ADD'] = add_patch

        if del_patch:
            # for a tag that has been deleted in the upload record in replace
            # mode, loop through all the fields of the tag and add additional
            # subfield with code '0' and value '__DELETE_FIELDS__'
            # NOTE Indicators taken into consideration while deleting fields
            for tag in del_patch:
                for data_tuple in del_patch[tag]:
                    ind1 = data_tuple[1]
                    ind2 = data_tuple[2]
                    record_delete_field(del_patch, tag, ind1, ind2)
                    record_add_field(del_patch, tag, ind1, ind2, "", [
                        (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)
                    ])
            result['DEL'] = del_patch

        return result
Example #31
0
def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG):
    """Handle 'major' record related requests like fetching, submitting or
    deleting a record, cancel editing or preparing a record for merging.

    """
    response = {}

    if request_type == "newRecord":
        # Create a new record.
        new_recid = reserve_record_id()
        new_type = data["newType"]
        if new_type == "empty":
            # Create a new empty record.
            create_cache_file(recid, uid)
            response["resultCode"], response["newRecID"] = 6, new_recid

        elif new_type == "template":
            # Create a new record from XML record template.
            template_filename = data["templateFilename"]
            template = get_record_template(template_filename)
            if not template:
                response["resultCode"] = 108
            else:
                record = create_record(template)[0]
                if not record:
                    response["resultCode"] = 109
                else:
                    record_add_field(record, "001", controlfield_value=str(new_recid))
                    create_cache_file(new_recid, uid, record, True)
                    response["resultCode"], response["newRecID"] = 7, new_recid

        elif new_type == "clone":
            # Clone an existing record (from the users cache).
            existing_cache = cache_exists(recid, uid)
            if existing_cache:
                try:
                    record = get_cache_file_contents(recid, uid)[2]
                except:
                    # if, for example, the cache format was wrong (outdated)
                    record = get_bibrecord(recid)
            else:
                # Cache missing. Fall back to using original version.
                record = get_bibrecord(recid)
            record_delete_field(record, "001")
            record_add_field(record, "001", controlfield_value=str(new_recid))
            create_cache_file(new_recid, uid, record, True)
            response["resultCode"], response["newRecID"] = 8, new_recid
    elif request_type == "getRecord":
        # Fetch the record. Possible error situations:
        # - Non-existing record
        # - Deleted record
        # - Record locked by other user
        # - Record locked by queue
        # A cache file will be created if it does not exist.
        # If the cache is outdated (i.e., not based on the latest DB revision),
        # cacheOutdated will be set to True in the response.
        record_status = record_exists(recid)
        existing_cache = cache_exists(recid, uid)
        read_only_mode = False

        if data.has_key("inReadOnlyMode"):
            read_only_mode = data["inReadOnlyMode"]

        if record_status == 0:
            response["resultCode"] = 102
        elif record_status == -1:
            response["resultCode"] = 103
        elif not read_only_mode and not existing_cache and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif (
            not read_only_mode
            and existing_cache
            and cache_expired(recid, uid)
            and record_locked_by_other_user(recid, uid)
        ):
            response["resultCode"] = 104
        elif not read_only_mode and record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            if data.get("deleteRecordCache"):
                delete_cache_file(recid, uid)
                existing_cache = False
                pending_changes = []
                disabled_hp_changes = {}
            if read_only_mode:
                if data.has_key("recordRevision"):
                    record_revision_ts = data["recordRevision"]
                    record_xml = get_marcxml_of_revision(recid, record_revision_ts)
                    record = create_record(record_xml)[0]
                    record_revision = timestamp_to_revision(record_revision_ts)
                    pending_changes = []
                    disabled_hp_changes = {}
                else:
                    # a normal cacheless retrieval of a record
                    record = get_bibrecord(recid)
                    record_revision = get_record_last_modification_date(recid)
                    pending_changes = []
                    disabled_hp_changes = {}
                cache_dirty = False
                mtime = 0
                undo_list = []
                redo_list = []
            elif not existing_cache:
                record_revision, record = create_cache_file(recid, uid)
                mtime = get_cache_mtime(recid, uid)
                pending_changes = []
                disabled_hp_changes = {}
                undo_list = []
                redo_list = []
                cache_dirty = False
            else:
                # TODO: This try except should be replaced with something nicer,
                #      like an argument indicating if a new cache file is to
                #      be created
                try:
                    cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list = get_cache_file_contents(
                        recid, uid
                    )
                    touch_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    if not latest_record_revision(recid, record_revision) and get_record_revisions(recid) != ():
                        # This sould prevent from using old cache in case of
                        # viewing old version. If there are no revisions,
                        # it means we should skip this step because this
                        # is a new record
                        response["cacheOutdated"] = True

                except:
                    record_revision, record = create_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    pending_changes = []
                    disabled_hp_changes = {}
                    cache_dirty = False
                    undo_list = []
                    redo_list = []
            if data["clonedRecord"]:
                response["resultCode"] = 9
            else:
                response["resultCode"] = 3
            revision_author = get_record_revision_author(recid, record_revision)
            last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid))
            revisions_history = get_record_revision_timestamps(recid)
            number_of_physical_copies = get_number_copies(recid)
            bibcirc_details_URL = create_item_details_url(recid, ln)
            can_have_copies = can_record_have_physical_copies(recid)

            response["cacheDirty"], response["record"], response["cacheMTime"], response["recordRevision"], response[
                "revisionAuthor"
            ], response["lastRevision"], response["revisionsHistory"], response["inReadOnlyMode"], response[
                "pendingHpChanges"
            ], response[
                "disabledHpChanges"
            ], response[
                "undoList"
            ], response[
                "redoList"
            ] = (
                cache_dirty,
                record,
                mtime,
                revision_to_timestamp(record_revision),
                revision_author,
                last_revision_ts,
                revisions_history,
                read_only_mode,
                pending_changes,
                disabled_hp_changes,
                undo_list,
                redo_list,
            )
            response["numberOfCopies"] = number_of_physical_copies
            response["bibCirculationUrl"] = bibcirc_details_URL
            response["canRecordHavePhysicalCopies"] = can_have_copies
            # Set tag format from user's session settings.
            try:
                tagformat_settings = session_param_get(req, "bibedit_tagformat")
                tagformat = tagformat_settings[recid]
            except KeyError:
                tagformat = CFG_BIBEDIT_TAG_FORMAT
            response["tagFormat"] = tagformat

    elif request_type == "submit":
        # Submit the record. Possible error situations:
        # - Missing cache file
        # - Cache file modified in other editor
        # - Record locked by other user
        # - Record locked by queue
        # - Invalid XML characters
        # If the cache is outdated cacheOutdated will be set to True in the
        # response.
        if not cache_exists(recid, uid):
            response["resultCode"] = 106
        elif not get_cache_mtime(recid, uid) == data["cacheMTime"]:
            response["resultCode"] = 107
        elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            try:
                tmp_result = get_cache_file_contents(recid, uid)
                record_revision = tmp_result[1]
                record = tmp_result[2]
                pending_changes = tmp_result[3]
                #                disabled_changes = tmp_result[4]

                xml_record = print_rec(record)
                record, status_code, list_of_errors = create_record(xml_record)
                if status_code == 0:
                    response["resultCode"], response["errors"] = 110, list_of_errors
                elif not data["force"] and not latest_record_revision(recid, record_revision):
                    response["cacheOutdated"] = True
                else:
                    save_xml_record(recid, uid)
                    response["resultCode"] = 4
            except:
                response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_wrong_cache_file_format"]
    elif request_type == "revert":
        revId = data["revId"]
        job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups()
        revision_xml = get_marcxml_of_revision(recid, job_date)
        save_xml_record(recid, uid, revision_xml)
        if cache_exists(recid, uid):
            delete_cache_file(recid, uid)
        response["resultCode"] = 4

    elif request_type == "cancel":
        # Cancel editing by deleting the cache file. Possible error situations:
        # - Cache file modified in other editor
        if cache_exists(recid, uid):
            if get_cache_mtime(recid, uid) == data["cacheMTime"]:
                delete_cache_file(recid, uid)
                response["resultCode"] = 5
            else:
                response["resultCode"] = 107
        else:
            response["resultCode"] = 5

    elif request_type == "deleteRecord":
        # Submit the record. Possible error situations:
        # - Record locked by other user
        # - Record locked by queue
        # As the user is requesting deletion we proceed even if the cache file
        # is missing and we don't check if the cache is outdated or has
        # been modified in another editor.
        existing_cache = cache_exists(recid, uid)
        pending_changes = []

        if has_copies(recid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_physical_copies_exist"]
        elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_user"]
        elif record_locked_by_queue(recid):
            response["resultCode"] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["error_rec_locked_by_queue"]
        else:
            if not existing_cache:
                record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = create_cache_file(
                    recid, uid
                )
            else:
                try:
                    record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list = get_cache_file_contents(
                        recid, uid
                    )[
                        1:
                    ]
                except:
                    record_revision, record, pending_changes, deactivated_hp_changes = create_cache_file(recid, uid)
            record_add_field(record, "980", " ", " ", "", [("c", "DELETED")])
            undo_list = []
            redo_list = []
            update_cache_file_contents(
                recid, uid, record_revision, record, pending_changes, deactivated_hp_changes, undo_list, redo_list
            )
            save_xml_record(recid, uid)
            delete_related_holdingpen_changes(recid)  # we don't need any changes
            # related to a deleted record
            response["resultCode"] = 10

    elif request_type == "deleteRecordCache":
        # Delete the cache file. Ignore the request if the cache has been
        # modified in another editor.
        if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == data["cacheMTime"]:
            delete_cache_file(recid, uid)
        response["resultCode"] = 11

    elif request_type == "prepareRecordMerge":
        # We want to merge the cache with the current DB version of the record,
        # so prepare an XML file from the file cache, to be used by BibMerge.
        # Possible error situations:
        # - Missing cache file
        # - Record locked by other user
        # - Record locked by queue
        # We don't check if cache is outdated (a likely scenario for this
        # request) or if it has been modified in another editor.
        if not cache_exists(recid, uid):
            response["resultCode"] = 106
        elif cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
            response["resultCode"] = 104
        elif record_locked_by_queue(recid):
            response["resultCode"] = 105
        else:
            save_xml_record(recid, uid, to_upload=False, to_merge=True)
            response["resultCode"] = 12

    return response
 def process_record(self, record):
     """@see: BaseFieldCommand.process_record"""
     bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2)
Example #33
0
def merge_field_group(rec1,
                      rec2,
                      fnum,
                      ind1='',
                      ind2='',
                      merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, '%', '%')
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2) == 0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields,
                                 ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return  #nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[
        1]  #keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        #combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else:  #diffing for one indicator
        for diff in alldiffs:  #look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[
            1]  #keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False:  #merge non-conflicting fields
        for m in diff:  #for every match of fields in the diff
            if m[0] is not None:  #if rec1 has a field in the diff, keep it
                new_fields.append(deepcopy(fields1[m[0]]))
            else:  #else take the field from rec2
                new_fields.append(deepcopy(fields2[m[1]]))
    else:  #merge all fields
        for m in diff:  #for every match of fields in the diff
            if m[1] is not None:  #if rec2 has a field, add it
                new_fields.append(deepcopy(fields2[m[1]]))
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    #if the fields are not the same then add the field of rec1
                    new_fields.append(deepcopy(fields1[m[0]]))
            else:
                new_fields.append(deepcopy(fields1[m[0]]))

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator(
            rec1.get(fnum, []))
        #find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None:  #if no smaller indicator pair exists
            insert_index = 0  #insertion will take place at the beginning
        else:  #else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return
Example #34
0
def Update_Approval_DB(parameters, curdir, form, user_info=None):
    """
    This function updates the approval database when a document has
    just been approved or rejected. It uses the [categformatDAM]
    parameter to compute the category of the document.  Must be called
    after the Get_Report_Number function.

    Parameters:

       * categformatDAM: It contains the regular expression which
                         allows the retrieval of the category from the
                         reference number.
                         Eg: if [categformatDAM]="TEST-<CATEG>-.*" and
                         the reference is "TEST-CATEG1-2001-001" then
                         the category will be recognized as "CATEG1".
    """
    global rn, sysno
    doctype = form['doctype']
    act = form['act']
    categformat = parameters['categformatDAM']

    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""

    pid = os.getpid()
    now = time.time()
    access = "%i_%s" % (now,pid)
    if act not in ["APP", "APS", "APM", "APO"]:
        # retrieve category
        if re.search("<FILE:",categformat):
            filename = categformat.replace("<FILE:","")
            filename = filename.replace(">","")
            if os.path.exists("%s/%s" % (curdir,filename)):
                fp = open("%s/%s" % (curdir,filename))
                category = fp.read()
                fp.close()
            else:
                category=""
            category = category.replace("\n","")
        else:
            categformat = categformat.replace("<CATEG>","([^-]*)")
            m_categ_search = re.match(categformat, rn)
            if m_categ_search is not None:
                if len(m_categ_search.groups()) > 0:
                    ## Found a match for the category of this document. Get it:
                    category = m_categ_search.group(1)
                else:
                    ## This document has no category.
                    category = ""
            else:
                category = ""

        if category == "":
            category = "unknown"
        sth = run_sql("SELECT status,dFirstReq,dLastReq,dAction FROM sbmAPPROVAL WHERE  doctype=%s and categ=%s and rn=%s", (doctype,category,rn,))

        if len(sth) == 0:
            run_sql("INSERT INTO sbmAPPROVAL (doctype, categ, rn, status, dFirstReq, dLastReq, dAction, access) VALUES (%s,%s,%s,'waiting',NOW(),NOW(),'',%s)", (doctype,category,rn,access,))
        else:
            run_sql("UPDATE sbmAPPROVAL SET dLastReq=NOW(), status='waiting' WHERE  doctype=%s and categ=%s and rn=%s", (doctype,category,rn,))
    else:
        ## Since this is the "APP" action, this call of the function must be
        ## on behalf of the referee - in order to approve or reject an item.
        ## We need to get the decision from the decision file:
        if decision_filename in (None, "", "NULL"):
            ## We don't have a name for the decision file.
            ## For backward compatibility reasons, try to read the decision from
            ## a file called 'decision' in curdir:
            if os.path.exists("%s/decision" % curdir):
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            else:
                decision = ""
        else:
            ## Try to read the decision from the decision file:
            try:
                fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
                decision = fh_decision.read().strip()
                fh_decision.close()
            except IOError:
                ## Oops, unable to open the decision file.
                decision = ""

        from invenio.bibrecord import record_delete_field, record_add_field, record_xml_output
        from invenio.bibedit_utils import get_bibrecord
        from invenio.bibtask import task_low_level_submission
        record = get_bibrecord(sysno)
        ## Either approve or reject the item, based upon the contents
        ## of 'decision':
        if decision == "approve":
            run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='approved' WHERE  rn=%s", (rn,))
        else:
            run_sql("UPDATE sbmAPPROVAL SET dAction=NOW(),status='rejected' WHERE  rn=%s", (rn,))
            if act == "APS":
                record_delete_field(record, "980")
                record_add_field(record, '980', ' ', ' ', '', [('a', 'REJBLOG')])
                fd, name = tempfile.mkstemp(suffix='.xml', dir=CFG_TMPDIR)
                os.write(fd, """<collection>\n""")
                os.write(fd, record_xml_output(record))
                os.write(fd, """</collection\n>""")
                os.close(fd)
                task_low_level_submission('bibupload', 'admin', '-c', name)
                task_low_level_submission('bibindex', 'admin')
                task_low_level_submission('webcoll', 'admin', '-c', "Provisional Blogs")
                task_low_level_submission('webcoll', 'admin', '-c', "Blogs")
    return ""
Example #35
0
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, '%', '%')
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2)==0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return #nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[1] #keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        #combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else: #diffing for one indicator
        for diff in alldiffs:  #look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[1] #keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False: #merge non-conflicting fields
        for m in diff: #for every match of fields in the diff
            if m[0] is not None: #if rec1 has a field in the diff, keep it
                new_fields.append( deepcopy(fields1[m[0]]) )
            else: #else take the field from rec2
                new_fields.append( deepcopy(fields2[m[1]]) )
    else: #merge all fields
        for m in diff: #for every match of fields in the diff
            if m[1] is not None: #if rec2 has a field, add it
                new_fields.append( deepcopy(fields2[m[1]]) )
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    #if the fields are not the same then add the field of rec1
                    new_fields.append( deepcopy(fields1[m[0]]) )
            else:
                new_fields.append( deepcopy(fields1[m[0]]) )

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, []) )
        #find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None: #if no smaller indicator pair exists
            insert_index = 0 #insertion will take place at the beginning
        else:  #else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return
                # Did not find existing record in database
                holdingpen_records.append(record)
                continue

            # We remove 500 field temporary/brief entry from revision if record already exists
            fields_500 = record_get_field_instances(record, '500', ind1="%", ind2="%")
            if fields_500 is not None:
                field_positions = []
                for field in fields_500:
                    subfields = field_get_subfield_instances(field)
                    for subfield in subfields:
                        if re.match("^.?((temporary|brief) entry).?$", subfield[1].lower(), re.IGNORECASE):
                            field_positions.append((field[1], field[2], field[4]))

                for ind1, ind2, pos in field_positions:
                    record_delete_field(record, '500', ind1=ind1, ind2=ind2, field_position_global=pos)

            # Now compare new version with existing one, returning a diff[tag] = (diffcode, [..])
            # None - if field is the same for both records
            # ('r',) - field missing from input record, ignored ATM
            # ('a',) - new field added, should be updated with append
            # ('c', difference_comparison) -> if field field_id exists in both records, but it's value has changed
            #                              -> uploaded with correct if accepted
            fields_to_add = []
            fields_to_correct = []
            holdingpen = False

            difference = record_diff(existing_record, record, compare_subfields=match_subfields)
            for tag, diff in difference.iteritems():
                if diff is None:
                    # No difference in tag
def perform_request_record(req, request_type, recid, uid, data):
    """Handle 'major' record related requests like fetching, submitting or
    deleting a record, cancel editing or preparing a record for merging.

    """
    response = {}

    if request_type == 'newRecord':
        # Create a new record.
        new_recid = reserve_record_id()
        new_type = data['newType']
        if new_type == 'empty':
            # Create a new empty record.
            create_cache_file(recid, uid)
            response['resultCode'], response['newRecID'] = 6, new_recid

        elif new_type == 'template':
            # Create a new record from XML record template.
            template_filename = data['templateFilename']
            template = get_record_template(template_filename)
            if not template:
                response['resultCode']  = 108
            else:
                record = create_record(template)[0]
                if not record:
                    response['resultCode']  = 109
                else:
                    record_add_field(record, '001',
                                     controlfield_value=str(new_recid))
                    create_cache_file(new_recid, uid, record, True)
                    response['resultCode'], response['newRecID']  = 7, new_recid

        elif new_type == 'clone':
            # Clone an existing record (from the users cache).
            existing_cache = cache_exists(recid, uid)
            if existing_cache:
                try:
                    record = get_cache_file_contents(recid, uid)[2]
                except:
                    # if, for example, the cache format was wrong (outdated)
                    record = get_bibrecord(recid)
            else:
                # Cache missing. Fall back to using original version.
                record = get_bibrecord(recid)
            record_delete_field(record, '001')
            record_add_field(record, '001', controlfield_value=str(new_recid))
            create_cache_file(new_recid, uid, record, True)
            response['resultCode'], response['newRecID'] = 8, new_recid
    elif request_type == 'getRecord':
        # Fetch the record. Possible error situations:
        # - Non-existing record
        # - Deleted record
        # - Record locked by other user
        # - Record locked by queue
        # A cache file will be created if it does not exist.
        # If the cache is outdated (i.e., not based on the latest DB revision),
        # cacheOutdated will be set to True in the response.
        record_status = record_exists(recid)
        existing_cache = cache_exists(recid, uid)
        read_only_mode = False
        if data.has_key("inReadOnlyMode"):
            read_only_mode = data['inReadOnlyMode']

        if record_status == 0:
            response['resultCode'] = 102
        elif record_status == -1:
            response['resultCode'] = 103
        elif not read_only_mode and not existing_cache and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif not read_only_mode and existing_cache and \
                cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif not read_only_mode and record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            if data.get('deleteRecordCache'):
                delete_cache_file(recid, uid)
                existing_cache = False
                pending_changes = []
                disabled_hp_changes = {}
            if read_only_mode:
                if data.has_key('recordRevision'):
                    record_revision_ts = data['recordRevision']
                    record_xml = get_marcxml_of_revision(recid, record_revision_ts)
                    record = create_record(record_xml)[0]
                    record_revision = timestamp_to_revision(record_revision_ts)
                    pending_changes = []
                    disabled_hp_changes = {}
                else:
                    # a normal cacheless retrieval of a record
                    record = get_bibrecord(recid)
                    record_revision = get_record_last_modification_date(recid)
                    pending_changes = []
                    disabled_hp_changes = {}
                cache_dirty = False
                mtime = 0
            elif not existing_cache:
                record_revision, record = create_cache_file(recid, uid)
                mtime = get_cache_mtime(recid, uid)
                pending_changes = []
                disabled_hp_changes = {}
                cache_dirty = False
            else:
                try:
                    cache_dirty, record_revision, record, pending_changes, disabled_hp_changes= \
                        get_cache_file_contents(recid, uid)
                    touch_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    if not latest_record_revision(recid, record_revision):
                        response['cacheOutdated'] = True
                except:
                    record_revision, record = create_cache_file(recid, uid)
                    mtime = get_cache_mtime(recid, uid)
                    pending_changes = []
                    disabled_hp_changes = {}
                    cache_dirty = False

            if data['clonedRecord']:
                response['resultCode'] = 9
            else:
                response['resultCode'] = 3

            revision_author = get_record_revision_author(recid, record_revision)
            last_revision_ts = revision_to_timestamp(get_record_last_modification_date(recid))
            revisions_history = get_record_revision_timestamps(recid)

            response['cacheDirty'], response['record'], response['cacheMTime'],\
                response['recordRevision'], response['revisionAuthor'], \
                response['lastRevision'], response['revisionsHistory'], \
                response['inReadOnlyMode'], response['pendingHpChanges'], \
                response['disabledHpChanges'] = cache_dirty, record, mtime, \
                revision_to_timestamp(record_revision), revision_author, \
                last_revision_ts, revisions_history, read_only_mode, pending_changes, \
                disabled_hp_changes
            # Set tag format from user's session settings.
            try:
                tagformat_settings = session_param_get(req, 'bibedit_tagformat')
                tagformat = tagformat_settings[recid]
            except KeyError:
                tagformat = CFG_BIBEDIT_TAG_FORMAT
            response['tagFormat'] = tagformat

    elif request_type == 'submit':
        # Submit the record. Possible error situations:
        # - Missing cache file
        # - Cache file modified in other editor
        # - Record locked by other user
        # - Record locked by queue
        # - Invalid XML characters
        # If the cache is outdated cacheOutdated will be set to True in the
        # response.
        if not cache_exists(recid, uid):
            response['resultCode'] = 106
        elif not get_cache_mtime(recid, uid) == data['cacheMTime']:
            response['resultCode'] = 107
        elif cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            try:
                record_revision, record, pending_changes, disabled_changes = get_cache_file_contents(recid, uid)[1:]
                xml_record = print_rec(record)
                record, status_code, list_of_errors = create_record(xml_record)
                if status_code == 0:
                    response['resultCode'], response['errors'] = 110, \
                        list_of_errors
                elif not data['force'] and \
                        not latest_record_revision(recid, record_revision):
                    response['cacheOutdated'] = True
                else:
                    save_xml_record(recid, uid)
                    response['resultCode'] = 4
            except:
                response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['wrong_cache_file_format']
    elif request_type == 'revert':
        revId = data['revId']
        job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups()
        revision_xml = get_marcxml_of_revision(recid, job_date)
        save_xml_record(recid, uid, revision_xml)
        if (cache_exists(recid, uid)):
            delete_cache_file(recid, uid)
        response['resultCode'] = 4

    elif request_type == 'cancel':
        # Cancel editing by deleting the cache file. Possible error situations:
        # - Cache file modified in other editor
        if cache_exists(recid, uid):
            if get_cache_mtime(recid, uid) == data['cacheMTime']:
                delete_cache_file(recid, uid)
                response['resultCode'] = 5
            else:
                response['resultCode'] = 107
        else:
            response['resultCode'] = 5

    elif request_type == 'deleteRecord':
        # Submit the record. Possible error situations:
        # - Record locked by other user
        # - Record locked by queue
        # As the user is requesting deletion we proceed even if the cache file
        # is missing and we don't check if the cache is outdated or has
        # been modified in another editor.
        existing_cache = cache_exists(recid, uid)
        pending_changes = []
        if existing_cache and cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            if not existing_cache:
                record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid)
            else:
                try:
                    record_revision, record, pending_changes, desactivated_hp_changes = get_cache_file_contents(
                        recid, uid)[1:]
                except:
                    record_revision, record, pending_changes, desactivated_hp_changes = create_cache_file(recid, uid)
            record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')])
            update_cache_file_contents(recid, uid, record_revision, record, pending_changes, desactivated_hp_changes)
            save_xml_record(recid, uid)
            delete_related_holdingpen_changes(recid) # we don't need any changes related to a deleted record
            response['resultCode'] = 10

    elif request_type == 'deleteRecordCache':
        # Delete the cache file. Ignore the request if the cache has been
        # modified in another editor.
        if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \
                data['cacheMTime']:
            delete_cache_file(recid, uid)
        response['resultCode'] = 11

    elif request_type == 'prepareRecordMerge':
        # We want to merge the cache with the current DB version of the record,
        # so prepare an XML file from the file cache, to be used by BibMerge.
        # Possible error situations:
        # - Missing cache file
        # - Record locked by other user
        # - Record locked by queue
        # We don't check if cache is outdated (a likely scenario for this
        # request) or if it has been modified in another editor.
        if not cache_exists(recid, uid):
            response['resultCode'] = 106
        elif cache_expired(recid, uid) and \
                record_locked_by_other_user(recid, uid):
            response['resultCode'] = 104
        elif record_locked_by_queue(recid):
            response['resultCode'] = 105
        else:
            save_xml_record(recid, uid, to_upload=False, to_merge=True)
            response['resultCode'] = 12

    return response
    def index(self, req, form):
        """ Display live bibz39 queue
        """
        referer = '/admin2/bibz39/'
        navtrail = ' <a class="navtrail" href=\"%s/help/admin\">Admin Area</a> ' % CFG_SITE_URL

        auth_code, auth_message = acc_authorize_action(req, 'runbibedit')
        if auth_code != 0:
            return page_not_authorized(req=req, referer=referer,
                                       text=auth_message, navtrail=navtrail)

        argd = wash_urlargd(form, {
            'search': (str, ''),
            'marcxml': (str, ''),
            'server': (list, []),
            'search_type': (str, ''),
        })

        if argd['marcxml']:
            uid = getUid(req)
            new_recid = reserve_record_id()
            record = create_record(argd["marcxml"])[0]
            record_delete_field(record, '001')
            record_delete_field(record, '005')
            record_add_field(record, '001',
                             controlfield_value=str(new_recid))
            create_cache(new_recid, uid, record, True)
            redirect_to_url(req, '{0}/record/edit/#state=edit&recid={1}'.format(CFG_SITE_SECURE_URL,
                                                                                new_recid))

        body_content = ''

        body_content += self.generate_request_form(argd)

        if "search" in argd and argd["search"] and 'search_type' in argd and argd["search_type"] in \
                self._request_type_dict:

            conn = None
            list_of_record = []
            errors = {}

            res = []
            err = False
            for server in argd["server"]:
                try:
                    errors[server] = {"internal": [], "remote": []}
                    conn = zoom.Connection(CFG_Z39_SERVER[server]["address"],
                                           CFG_Z39_SERVER[server]["port"],
                                           user=CFG_Z39_SERVER[server].get("user", None),
                                           password=CFG_Z39_SERVER[server].get("password", None))
                    conn.databaseName = CFG_Z39_SERVER[server]["databasename"]
                    conn.preferredRecordSyntax = CFG_Z39_SERVER[server]["preferredRecordSyntax"]
                    value = argd["search"].replace("-", "") if argd["search_type"] == "ISBN" else \
                        argd["search"]
                    query = zoom.Query('CCL', u'{0}="{1}"'.format(
                        self._request_type_dict[argd["search_type"]], value))
                    body_content += ""
                    try:
                        server_answer = conn.search(query)
                        if len(server_answer) < 100:
                            nb_to_browse = len(server_answer)
                        else:
                            nb_to_browse = 100
                            errors[server]["remote"].append(
                                "The server {0} returned too many results. {1}/{2} are printed.".format(
                                    server, nb_to_browse, len(server_answer)))
                        for result in server_answer[0:nb_to_browse]:
                            res.append({"value": result, "provider": server})
                    except zoom.Bib1Err as e:
                        errors[server]["remote"].append("{0}".format(e))
                        err = True
                    conn.close()
                except Exception as e:
                    register_exception()
                    errors[server]["internal"].append("{0}".format(e))
                    if conn:
                        conn.close()

            p_err = False
            warnings = '<div class="error">'
            for server in errors:
                if errors[server]["internal"] or errors[server]["remote"]:
                    warnings += "<b>{0}</b><ul>".format(server)
                    for error in errors[server]["internal"]:
                        warnings += "<li>(internal) {0}</li>".format(error)
                    for error in errors[server]["remote"]:
                        warnings += "<li>(remote) {0}</li>".format(error)
                    warnings += "</ul>"
                    p_err = True
            if p_err:
                body_content += "{0}</div>".format(warnings)

            if res:

                body_content += "<table id='result_area' class='fullwidth  tablesorter'>"
                body_content += "<tr><th class='bibz39_titles_th' >Title</th><th class='bibz39_sources_th'>Authors</th><th>Publisher</th><th class='bibz39_sources_th'>Source</th><th><div class='bibz39_button_td'>View XML</div></th><th><div class='bibz39_button_td'>Import</div></th></tr>"

                for identifier, rec in enumerate(res):
                    list_of_record.append(
                        create_record(
                            self.interpret_string(zmarc.MARC(
                                rec["value"].data, strict=0).toMARCXML()))[0])
                    title = ''
                    authors = ''
                    publishers = ''

                    if "100" in list_of_record[identifier]:
                        for author in list_of_record[identifier]["100"]:
                            for tag in author[0]:
                                if tag[0] == 'a':
                                    if authors != "":
                                        authors += " / " + tag[1].strip(",;.")
                                    else:
                                        authors += tag[1].strip(",;.") + " "
                    if "700" in list_of_record[identifier]:
                        for author in list_of_record[identifier]["700"]:
                            for tag in author[0]:
                                if tag[0] == 'a':
                                    if authors != "":
                                        authors += " / " + tag[1].strip(",;.")
                                    else:
                                        authors += tag[1].strip(",;.") + " "
                    if "260" in list_of_record[identifier]:
                        for publisher in list_of_record[identifier]["260"][0][0]:
                            publishers += publisher[1] + " "
                    if "245" in list_of_record[identifier]:
                        for title_constituant in list_of_record[identifier]["245"][0][0]:
                            title += title_constituant[1] + " "

                    body_content += "<tr><td><div class='bibz39_titles' onclick='showxml({6})'>{0}<div><td>{4}</td><td>{5}</td</td><td><div>{2}</div></td><td><div class='bibz39_button_td'>{3}</div></td><td><div class='bibz39_button_td'>{1}</div></td></tr>".format(
                        title,
                        '<form method="post" action="/admin2/bibz39/"><input type="hidden"  name="marcxml"  value="{0}"><input type="submit" value="Import" /></form>'.format(
                            cgi.escape(record_xml_output(list_of_record[identifier])).replace(
                                "\"", "&quot;").replace("\'", "&quot;")),
                        rec["provider"],
                        '<button onclick="showxml({0})">View</button>'.format(identifier),
                        authors, publishers, identifier)
                body_content += "</table>"
                body_content += '<script type="text/javascript">'
                body_content += "var gAllMarcXml= {"
                for i, rec in enumerate(list_of_record):
                    body_content += "{0}:{1},".format(i, json.dumps(record_xml_output(rec)))
                body_content += "};"
                body_content += '</script>'

            else:
                if not err:
                    body_content += "<p class='bibz39_button_td spinning_wheel'> No result</p>"

            body_content += '<div id="dialog-message" title="XML Preview"></div></div>'

        return page(title="Z39.50 Search",
                    body=body_content,
                    errors=[],
                    warnings=[],
                    metaheaderadd=get_head(),
                    req=req)