Esempio n. 1
0
    def detect_conflict(self, up_record, up_patch, up_date, orig_record, orig_patch, orig_date):
        """
        Compares the generated patches for Upload and Original Records for any common tags.
        Raises Conflict Error in case of any common tags.

        Returns the upload record patch in case of no conflicts.
        """
        conflict_tags = []

        # if tag is modified in upload rec but modified/deleted in current rec
        if 'MOD' in up_patch:
            for tag in up_patch['MOD']:
                if 'MOD' in orig_patch and tag in orig_patch['MOD'] \
                or 'DEL' in orig_patch and tag in orig_patch['DEL']:
                    conflict_tags.append(tag)

        # if tag is added in upload rec but added in current revision
        if 'ADD' in up_patch:
            for tag in up_patch['ADD']:
                if 'ADD' in orig_patch and tag in orig_patch['ADD']:
                    conflict_tags.append(tag)

        # if tag is deleted in upload rec but modified/deleted in current rec
        if 'DEL' in up_patch:
            for tag in up_patch['DEL']:
                if 'MOD' in orig_patch and tag in orig_patch['MOD'] \
                or 'DEL' in orig_patch and tag in orig_patch['DEL']:
                    conflict_tags.append(tag)

        if conflict_tags:
            ## It looks like there are conflicting tags. However these might
            ## be false positive: we need to filter out those tags which
            ## have been modified in both situation but ends up having
            ## the same change.
            real_conflict_tags = []
            for tag in conflict_tags:
                if tag == '856':
                    ## HACK: FIXME: we are not yet able to preserve the sorting
                    ## of 8564 tags WRT FFT in BibUpload.
                    ## Therefore we implement here a workaround to ignore
                    ## the order of fields in case of 856.
                    ## See ticket #1606.
                    if tag in up_record and tag in orig_record and records_identical({tag: up_record[tag]}, {tag: orig_record[tag]}, ignore_duplicate_subfields=True, ignore_duplicate_controlfields=True, ignore_field_order=False, ignore_subfield_order=False):
                        continue
                elif tag in up_record and tag in orig_record and records_identical({tag: up_record[tag]}, {tag: orig_record[tag]}, ignore_duplicate_subfields=True, ignore_duplicate_controlfields=True):
                    continue
                elif tag not in up_record and tag not in orig_record:
                    continue
                else:
                    real_conflict_tags.append(tag)
            if real_conflict_tags:
                raise InvenioBibUploadConflictingRevisionsError(self.rec_id,
                                                            real_conflict_tags,
                                                            up_date,
                                                            orig_date,
                                                            up_record,
                                                            orig_record)

        return up_patch
Esempio n. 2
0
    def test_legacy_create_recstruct(self):
        """Record - create recstruct."""
        from invenio.legacy.bibrecord import create_record, records_identical

        blob = '''
            <record>
              <controlfield tag="001">8</controlfield>
              <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">Efstathiou, G P</subfield>
                <subfield code="u">Cambridge University</subfield>
              </datafield>
              <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">Title</subfield>
                <subfield code="b">SubTitle</subfield>
              </datafield>
              <datafield tag="700" ind1=" " ind2=" ">
               <subfield code="a">Lasenby, A N</subfield>
              </datafield>
              <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">Articles</subfield>
              </datafield>
            </record>
        '''
        rec = Record.create(blob, master_format='marc', namespace='testsuite')
        json_recstruct = rec.legacy_create_recstruct()
        recstruct, _, _ = create_record(blob)
        self.assertTrue(records_identical(json_recstruct, recstruct,
                                          ignore_subfield_order=True))
Esempio n. 3
0
def cli_clean_revisions(recid, dry_run=True, verbose=True):
    """Clean revisions of the given recid, by removing duplicate revisions
    that do not change the content of the record."""
    if recid == '*':
        recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD"))
    else:
        try:
            recids = [int(recid)]
        except ValueError:
            print('ERROR: record ID must be integer, not %s.' % recid)
            sys.exit(1)
    for recid in recids:
        all_revisions = run_sql("SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC", (recid,))
        previous_rec = {}
        deleted_revisions = 0
        for marcxml, job_id, job_name, job_person, job_date in all_revisions:
            try:
                current_rec = create_record(zlib.decompress(marcxml))[0]
            except Exception:
                print("ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[0], file=sys.stderr)
                sys.exit(1)
            if records_identical(current_rec, previous_rec):
                deleted_revisions += 1
                if not dry_run:
                    run_sql("DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s", (recid, job_id, job_name, job_person, job_date))
            previous_rec = current_rec
        if verbose and deleted_revisions:
            print("record %s: deleted %s duplicate revisions out of %s" % (recid, deleted_revisions, len(all_revisions)))
    if verbose:
        print("DONE")
Esempio n. 4
0
 def test_legacy_export_marcxml(self):
     """Record - legacy export marxml."""
     # FIXME: use a better way to compare
     from invenio_record.models import Record
     from invenio.legacy.bibrecord import create_record, records_identical
     blob = '''
         <record>
           <controlfield tag="001">8</controlfield>
           <datafield tag="100" ind1=" " ind2=" ">
             <subfield code="a">Efstathiou, G P</subfield>
             <subfield code="u">Cambridge University</subfield>
           </datafield>
           <datafield tag="245" ind1=" " ind2=" ">
             <subfield code="a">Title</subfield>
             <subfield code="b">SubTitle</subfield>
           </datafield>
           <datafield tag="700" ind1=" " ind2=" ">
            <subfield code="a">Lasenby, A N</subfield>
           </datafield>
           <datafield tag="980" ind1=" " ind2=" ">
             <subfield code="a">Articles</subfield>
           </datafield>
         </record>
     '''
     rec = Record.create(blob, master_format='marc', namespace='testsuite')
     recstruct, _, _ = create_record(blob)
     json_recstruct, _, _ = create_record(rec.legacy_export_as_marc())
     self.assertTrue(records_identical(json_recstruct, recstruct,
                                       ignore_subfield_order=True))
Esempio n. 5
0
    def test_legacy_create_recstruct(self):
        """Record - create recstruct."""
        from invenio.legacy.bibrecord import create_record, records_identical

        blob = '''
            <record>
              <controlfield tag="001">8</controlfield>
              <datafield tag="100" ind1=" " ind2=" ">
                <subfield code="a">Efstathiou, G P</subfield>
                <subfield code="u">Cambridge University</subfield>
              </datafield>
              <datafield tag="245" ind1=" " ind2=" ">
                <subfield code="a">Title</subfield>
                <subfield code="b">SubTitle</subfield>
              </datafield>
              <datafield tag="700" ind1=" " ind2=" ">
               <subfield code="a">Lasenby, A N</subfield>
              </datafield>
              <datafield tag="980" ind1=" " ind2=" ">
                <subfield code="a">Articles</subfield>
              </datafield>
            </record>
        '''
        rec = Record.create(blob, master_format='marc', namespace='testsuite')
        json_recstruct = rec.legacy_create_recstruct()
        recstruct, _, _ = create_record(blob)
        self.assertTrue(
            records_identical(json_recstruct,
                              recstruct,
                              ignore_subfield_order=True))
Esempio n. 6
0
def cli_clean_revisions(recid, dry_run=True, verbose=True):
    """Clean revisions of the given recid, by removing duplicate revisions
    that do not change the content of the record."""
    if recid == '*':
        recids = intbitset(
            run_sql("""SELECT DISTINCT id_bibrec FROM "hstRECORD" """))
    else:
        try:
            recids = [int(recid)]
        except ValueError:
            print('ERROR: record ID must be integer, not %s.' % recid)
            sys.exit(1)
    for recid in recids:
        all_revisions = run_sql(
            """SELECT marcxml, job_id, job_name, job_person, job_date FROM "hstRECORD" WHERE id_bibrec=%s ORDER BY job_date ASC""",
            (recid, ))
        previous_rec = {}
        deleted_revisions = 0
        for marcxml, job_id, job_name, job_person, job_date in all_revisions:
            try:
                current_rec = create_record(zlib.decompress(str(marcxml)))[0]
            except Exception:
                print(
                    "ERROR: corrupted revisions found. Please run %s --fix-revisions '*'"
                    % sys.argv[0],
                    file=sys.stderr)
                sys.exit(1)
            if records_identical(current_rec, previous_rec):
                deleted_revisions += 1
                if not dry_run:
                    run_sql(
                        """DELETE FROM "hstRECORD" WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s""",
                        (recid, job_id, job_name, job_person, job_date))
            previous_rec = current_rec
        if verbose and deleted_revisions:
            print("record %s: deleted %s duplicate revisions out of %s" %
                  (recid, deleted_revisions, len(all_revisions)))
    if verbose:
        print("DONE")
Esempio n. 7
0
    def detect_conflict(self, up_record, up_patch, up_date, orig_record,
                        orig_patch, orig_date):
        """
        Compares the generated patches for Upload and Original Records for any common tags.
        Raises Conflict Error in case of any common tags.

        Returns the upload record patch in case of no conflicts.
        """
        conflict_tags = []

        # if tag is modified in upload rec but modified/deleted in current rec
        if 'MOD' in up_patch:
            for tag in up_patch['MOD']:
                if 'MOD' in orig_patch and tag in orig_patch['MOD'] \
                or 'DEL' in orig_patch and tag in orig_patch['DEL']:
                    conflict_tags.append(tag)

        # if tag is added in upload rec but added in current revision
        if 'ADD' in up_patch:
            for tag in up_patch['ADD']:
                if 'ADD' in orig_patch and tag in orig_patch['ADD']:
                    conflict_tags.append(tag)

        # if tag is deleted in upload rec but modified/deleted in current rec
        if 'DEL' in up_patch:
            for tag in up_patch['DEL']:
                if 'MOD' in orig_patch and tag in orig_patch['MOD'] \
                or 'DEL' in orig_patch and tag in orig_patch['DEL']:
                    conflict_tags.append(tag)

        if conflict_tags:
            ## It looks like there are conflicting tags. However these might
            ## be false positive: we need to filter out those tags which
            ## have been modified in both situation but ends up having
            ## the same change.
            real_conflict_tags = []
            for tag in conflict_tags:
                if tag == '856':
                    ## HACK: FIXME: we are not yet able to preserve the sorting
                    ## of 8564 tags WRT FFT in BibUpload.
                    ## Therefore we implement here a workaround to ignore
                    ## the order of fields in case of 856.
                    ## See ticket #1606.
                    if tag in up_record and tag in orig_record and records_identical(
                        {tag: up_record[tag]}, {tag: orig_record[tag]},
                            ignore_duplicate_subfields=True,
                            ignore_duplicate_controlfields=True,
                            ignore_field_order=False,
                            ignore_subfield_order=False):
                        continue
                elif tag in up_record and tag in orig_record and records_identical(
                    {tag: up_record[tag]}, {tag: orig_record[tag]},
                        ignore_duplicate_subfields=True,
                        ignore_duplicate_controlfields=True):
                    continue
                elif tag not in up_record and tag not in orig_record:
                    continue
                else:
                    real_conflict_tags.append(tag)
            if real_conflict_tags:
                raise InvenioBibUploadConflictingRevisionsError(
                    self.rec_id, real_conflict_tags, up_date, orig_date,
                    up_record, orig_record)

        return up_patch