Beispiel #1
def prepared_ead(request, archive, filename, mode):
    """Display information about changes made by preparing an EAD file for
    publication.  If no changes are made, user will be redirected to main admin
    page with a message to that effect.

    In **summary** mode, displays a brief, color-coded summary of changes between
    original and prepped version of the file.  In **diff** mode, displays a full,
    side-by-side diff generated by :class:`difflib.HtmlDiff`.  (Note: because it
    is very large, the full diff is *not* embedded in the site template, and is
    intended to be opened in a new window.)

    :param filename: name of the file to prep; should be base filename only,
        document will be pulled from the configured source directory.
    :param mode: one of **diff** or **summary**


    # determine full path based on archive / svn
    arch = Archive.objects.get(slug=archive)
    # arch = get_object_or_404(Archive, slug=archive)
    fullpath = os.path.join(arch.svn_local_path, filename)
    changes = []

    # TODO: expire cache if file has changed since prepped eadxml was cached
    prep_ead = prepared_eadxml(request, arch.slug, filename)

    if prep_ead.status_code == 200:
        orig_ead = load_xmlobject_from_file(fullpath, FindingAid)  # validate or not?
        original_xml = orig_ead.serializeDocument()  # store as serialized by xml object, so xml output will be the same

        prep_xml = prep_ead.content
        ead = load_xmlobject_from_string(prep_xml, FindingAid)  # validate?
        if mode == 'diff':
            diff = difflib.HtmlDiff(8, 80)  # set columns to wrap at 80 characters
            # generate a html table with line-by-line comparison (meant to be called in a new window)
            changes = diff.make_file(original_xml.split('\n'), prep_xml.split('\n'))
            return HttpResponse(changes)
        elif mode == 'summary':
            # prepared EAD should pass sanity checks required for publication
            errors = utils.check_eadxml(ead)
            changes = list(difflib.unified_diff(original_xml.split('\n'), prep_xml.split('\n')))
            if not changes:
      , 'No changes made to <b>%s</b>; EAD is already prepared.' % filename)
                # redirect to main admin page with code 303 (See Other)
                return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))
    elif prep_ead.status_code == 500:
        # something went wrong with generating prep xml; could be one of:
        # - non-well-formed xml (failed to load original document at all)
        # - error generating an ARK for the document
        errors = [prep_ead.content]
        # this shouldn't happen; not 200 or 500 == something went dreadfully wrong
        errors = ['Something went wrong trying to load the specified document.',
                  prep_ead.content]     # pass along the output in case it is useful?

    return render(request, 'fa_admin/prepared.html', {
        'filename': filename,
        'changes': changes, 'errors': errors,
        'xml_status': prep_ead.status_code,
        'archive': arch})
Beispiel #2
    def test_prep_ead(self):
        # valid fixtures is an ead with series/subseries, and index
        # - clear out fixture ark url to trigger generating a new one (simulated)
        ead = utils.prep_ead(self.valid_ead, self.valid_eadfile)
        self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid")
        self.assertEqual(u'hartsfield558', ead.eadid.value)
        self.assertEqual(u'hartsfield558_series1', ead.dsc.c[0].id)
        self.assertEqual(u'hartsfield558_subseries6.1', ead.dsc.c[5].c[0].id)
        self.assertEqual(u'hartsfield558_index1', ead.archdesc.index[0].id)
        # ark should be generated and stored in eadid url
        self.assertEqual(MockDjangoPidmanClient.test_ark, ead.eadid.url)
        # short-form ark should be stored in identifier attribute

        # ead with no series
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'pittsfreeman1036.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
        self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid")
        self.assertEqual(u'pittsfreeman1036', ead.eadid.value)

        # series with no unitid
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'raoul548.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
        self.assertEqual(u'raoul548_series3', ead.dsc.c[2].id)

        # whitespace cleanup
        ead = utils.prep_ead(self.invalid_ead, self.invalid_eadfile)
        # - no leading whitespace in list title
        # ead.archdesc.origination is getting normalized, so can't be used for testing
        origination = ead.node.xpath('//e:origination/e:persname', namespaces={'e': EAD_NAMESPACE})
        self.assertEqual(u'Hartsfield, William Berry.', origination[0].text)
        # test the node text directly (does not include unitdate)
        self.assertEqual(u'William Berry Hartsfield papers, ', ead.unittitle.node.text)
        self.assertEqual(u'Gone with the wind (Motion picture)',
        self.assertEqual(u'Allen, Ivan.',
        self.assertEqual(u'Mines and mineral resources--Georgia.',
        # unicode characters
        self.assertEqual(u'Motion pictures--Georgia. \u2026',
        self.assertEqual(u'Motion pictures.',
        # remaining errors after clean-up:
        # 1 - duplicate origination
        # 2 - > 2 containers in a did (summary error and list of problem dids)
        # 2 - 1 container in a did (summary error and list of problem dids)
        # = 5
        self.assertEqual(5, len(utils.check_eadxml(ead)),
            "only 3 errors (duplicate origination, 3 containers in a did, 1 container in a did) should be left in invalid test fixture after cleaning")

        # special case - unittitle begins with a <title>
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'pittsfreeman1036.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
            'cleaned unittitle with leading <title> should not start with "None"')
Beispiel #3
    def test_check_eadxml(self):
        # use invalid ead fixture to check error detection
        ead = self.invalid_ead
        ead.eadid.value = 'foo#~@/'    # set invalid eadid for this test only

        # invalid fixture has several errors
        errors = utils.check_eadxml(ead)
        self.assertNotEqual(0, len(errors))
        # - series/subseries ids missing, index id missing
        self.assert_("series c01 id attribute is not set for Series 1: Personal papers, 1918-1986"
                    in errors, 'c01 missing id error reported')
        self.assert_("subseries c02 id attribute is not set for Subseries 6.1: Minerals and mining files, 1929-1970"
                    in errors, 'c02 missing id error reported')
        self.assert_("index id attribute is not set for Index of Selected Correspondents"
                    in errors, 'index missing id error reported')
        # - origination count error
        self.assert_("Site expects only one archdesc/did/origination; found 2" in errors,
                    'multiple origination error reported')
        # - whitespace in list title
        self.assert_("Found leading whitespace in list title field (origination/persname): " +
                    "'  Hartsfield, William Berry.'" in errors, 'leading whitespace in origination reported')
        # - eadid regex
        self.assert_("eadid '%s' does not match site URL regular expression" % ead.eadid.value
                    in errors, 'eadid regex error reported')

        #ARK in url and identifier not set or invalid
        self.assert_("eadid url is either not set or not an ARK. " +
            "To correct, run the prep process again."
                    in errors, 'eadid ark not in url')
        self.assert_("eadid identifier is either not set or not an ARK" +
            "To correct, run the prep process again."
                    in errors, 'eadid ark not in identifier')

        #valid ARKs in url and identifier but do not match
        ark1 = ""
        ark1_short = "ark:/25593/1234"
        ark2_short = "ark:/25593/567"
        ead.eadid.url = ark1
        ead.eadid.identifier = ark2_short
        errors = utils.check_eadxml(ead)

        self.assert_("eadid url is either not set or not an ARK. " +
            "To correct, run the prep process again."
                    not in errors, 'valid eadid ark set in url')
        self.assert_("eadid identifier is either not set or not an ARK" +
            "To correct, run the prep process again."
                    not in errors, 'valid eadid ark set in identifier')

        self.assert_("eadid url and identifier do not match: url '%s' should end with identifier '%s'" % (ark1, ark2_short)
                    in errors, 'eadid url and  identifier do not march')

        # Change url and identifier to match
        ead.eadid.url = ark1
        ead.eadid.identifier = ark1_short
        errors = utils.check_eadxml(ead)

        self.assert_("eadid url and identifier do not match: url '%s' should end with identifier '%s'" % (ark1, ark1_short)
                    not in errors, 'eadid url and  identifier march')

        # - list title first letter regex
        # simulate non-whitespace, non-alpha first letter in list title
        ead.list_title.node.text = "1234"  # list title is not normally settable; overriding for test
        errors = utils.check_eadxml(ead)
        self.assert_("First letter ('1') of list title field origination/persname does not match browse letter URL regex '%s'" \
                     % TITLE_LETTERS in errors, 'title first letter regex error reported')

        # empty/unset list title field
        ead.list_title.node.text = None
        errors = utils.check_eadxml(ead)
        self.assert_("List title seems to be empty" in errors)

        # - whitespace in control access terms
        self.assert_("Found leading whitespace in controlaccess term ' Gone with the wind (Motion picture)' (title)"
                    in errors, 'controlaccess title leading whitespace reported')
        self.assert_("Found leading whitespace in controlaccess term '  \t   Selznick, David O., 1902-1965.' (persname)"
                    in errors, 'controlaccess name leading whitespace reported')
        self.assert_("Found leading whitespace in controlaccess term '  \t   Mines and mineral resources--Georgia.' (subject)"
                    in errors, 'controlaccess subject leading whitespace reported')
        self.assert_("Found leading whitespace in controlaccess term ' Motion pictures.' (genreform)"
                    in errors, 'controlaccess genre leading whitespace reported')

        # - did with > 2 containers
        self.assert_('Site expects maximum of 2 containers per did; found 1 did(s) with more than 2'
                    in errors, 'did with more than 2 containers reported')

        # - did with only 1 container
        self.assert_('Site expects 2 containers per did; found 1 did(s) with only 1'
                    in errors, 'did with only 1 container reported')

        # make sure we handle quirky document with a <title> at the beginning of the <unittitle>
        eadfile = os.path.join(settings.BASE_DIR, 'fa',
            'tests', 'fixtures', 'pittsfreeman1036.xml')
        ead_nested_title = load_xmlobject_from_file(eadfile, FindingAid)
        errors = utils.check_eadxml(ead_nested_title)
        self.assert_(all('list title' not in err for err in errors),
                     'nested <title> in <unittitle> should not generate a list title whitespace error')