Example #1
0
def prepared_eadxml(request, archive, filename):
    """On GET, serves out a prepared version of the EAD file in the specified
    archive subversion directory. Response header is set so the user should
    be prompted to download the xml, with a filename matching that of
    the original document.

    On POST, commits the prepared version of the EAD file to the subversion
    directory of the specified archive, with a log message indicating the user
    who requested the commit.

    Steps taken to prepare a document are documented in
    :meth:`~findingaids.fa_admin.utils.prep_ead`.

    :param filename: name of the file to prep; should be base filename only,
        document will be pulled from the configured source directory.
    """
    # find relative to svn path if associated with an archive
    prepped_xml = cache.get(filename)
    arch = get_object_or_404(Archive, slug=archive)
    fullpath = os.path.join(arch.svn_local_path, filename)
    if prepped_xml is None:
        try:
            ead = load_xmlobject_from_file(fullpath, FindingAid)  # validate or not?
        except XMLSyntaxError, e:
            # xml is not well-formed : return 500 with error message
            return HttpResponseServerError("Could not load document: %s" % e)

        # flash meesage that appear on the screen for user, message itself is generated in utils.py
        with message_logging(request, 'findingaids.fa_admin.utils', logging.INFO):
            try:
                ead = utils.prep_ead(ead, filename)
                prepped_xml = ead.serializeDocument()
                cache.set(filename, prepped_xml)
            except Exception as e:
                # any exception on prep is most likely ark generation
                return HttpResponseServerError('Failed to prep the document: ' + str(e))
Example #2
0
    def handle(self, *args, **options):
        verbosity = int(options['verbosity'])

        self._setup_logging(verbosity)

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return


        if verbosity == self.v_all:
            print "Preparing documents from all defined Archives"

        updated = 0
        unchanged = 0
        errored = 0

        if len(args):
            files = args
        else:
            files = set()
            svn = svn_client()
            for archive in Archive.objects.all():
                # update to make sure we have latest version of everything
                svn.update(str(archive.svn_local_path))   # apparently can't handle unicode
                files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml'))))

        for file in files:
            try:
                ead = load_xmlobject_from_file(file, FindingAid)
                orig_xml = ead.serializeDocument(pretty=True)
                ead = utils.prep_ead(ead, file)
                # sanity check before saving
                dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file)
                errors = utils.check_ead(file, dbpath, xml=ead.serializeDocument())
                if errors:
                    errored += 1
                    print "Prepared EAD for %s does not pass sanity checks, not saving." % file
                    if verbosity >= self.v_normal:
                        print "Errors found:"
                        for err in errors:
                            # some errors include a list of error instances - display nicely
                            if isinstance(err, list):
                                for suberr in err:
                                    print "    %s" % suberr
                            else:
                                print "  %s" % err
                elif orig_xml == ead.serializeDocument(pretty=True):
                    if verbosity >= self.v_normal:
                        print "No changes made to %s" % file
                    unchanged += 1
                else:
                    with open(file, 'w') as f:
                        ead.serializeDocument(f, pretty=True)
                    if verbosity >= self.v_normal:
                        print "Updated %s" % file
                    updated += 1
            except XMLSyntaxError, e:
                # xml is not well-formed
                print "Error: failed to load %s (document not well-formed XML?)" \
                            % file
                errored += 1
            except Exception, e:
                # catch any other exceptions
                print "Error: failed to prep %s : %s" % (file, e)
                errored += 1
Example #3
0
    def test_prep_ead(self):
        # valid fixtures is an ead with series/subseries, and index
        # - clear out fixture ark url to trigger generating a new one (simulated)
        del(self.valid_ead.eadid.url)
        del(self.valid_ead.eadid.identifier)
        ead = utils.prep_ead(self.valid_ead, self.valid_eadfile)
        self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid")
        self.assertEqual(u'hartsfield558', ead.eadid.value)
        self.assertEqual(u'hartsfield558_series1', ead.dsc.c[0].id)
        self.assertEqual(u'hartsfield558_subseries6.1', ead.dsc.c[5].c[0].id)
        self.assertEqual(u'hartsfield558_index1', ead.archdesc.index[0].id)
        # ark should be generated and stored in eadid url
        self.assertEqual(MockDjangoPidmanClient.test_ark, ead.eadid.url)
        # short-form ark should be stored in identifier attribute
        self.assert_(MockDjangoPidmanClient.test_ark.endswith(ead.eadid.identifier))

        # ead with no series
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'pittsfreeman1036.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
        self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid")
        self.assertEqual(u'pittsfreeman1036', ead.eadid.value)

        # series with no unitid
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'raoul548.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
        self.assertEqual(u'raoul548_series3', ead.dsc.c[2].id)

        # whitespace cleanup
        ead = utils.prep_ead(self.invalid_ead, self.invalid_eadfile)
        # - no leading whitespace in list title
        # ead.archdesc.origination is getting normalized, so can't be used for testing
        origination = ead.node.xpath('//e:origination/e:persname', namespaces={'e': EAD_NAMESPACE})
        self.assertEqual(u'Hartsfield, William Berry.', origination[0].text)
        # test the node text directly (does not include unitdate)
        self.assertEqual(u'William Berry Hartsfield papers, ', ead.unittitle.node.text)
        self.assertEqual(u'Gone with the wind (Motion picture)',
                        ead.archdesc.controlaccess.controlaccess[0].title[0].value)
        self.assertEqual(u'Allen, Ivan.',
                        ead.archdesc.controlaccess.controlaccess[1].person_name[0].value)
        self.assertEqual(u'Mines and mineral resources--Georgia.',
                        ead.archdesc.controlaccess.controlaccess[3].subject[1].value)
        # unicode characters
        self.assertEqual(u'Motion pictures--Georgia. \u2026',
                        ead.archdesc.controlaccess.controlaccess[3].subject[2].value)
        self.assertEqual(u'Motion pictures.',
                        ead.archdesc.controlaccess.controlaccess[-1].genre_form[0].value)
        # remaining errors after clean-up:
        # 1 - duplicate origination
        # 2 - > 2 containers in a did (summary error and list of problem dids)
        # 2 - 1 container in a did (summary error and list of problem dids)
        # = 5
        self.assertEqual(5, len(utils.check_eadxml(ead)),
            "only 3 errors (duplicate origination, 3 containers in a did, 1 container in a did) should be left in invalid test fixture after cleaning")

        # special case - unittitle begins with a <title>
        eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests',
            'fixtures', 'pittsfreeman1036.xml')
        ead = load_xmlobject_from_file(eadfile, FindingAid)
        ead = utils.prep_ead(ead, eadfile)
        self.assertFalse(unicode(ead.list_title).startswith('None'),
            'cleaned unittitle with leading <title> should not start with "None"')
Example #4
0
    :param filename: name of the file to prep; should be base filename only,
        document will be pulled from the configured source directory.
    """
    # find relative to svn path if associated with an archive
    arch = get_object_or_404(Archive, slug=archive)
    fullpath = os.path.join(arch.svn_local_path, filename)
    try:
        ead = load_xmlobject_from_file(fullpath, FindingAid)  # validate or not?
    except XMLSyntaxError, e:
        # xml is not well-formed : return 500 with error message
        return HttpResponseServerError("Could not load document: %s" % e)

    with message_logging(request, 'findingaids.fa_admin.utils', logging.INFO):
        try:
            ead = utils.prep_ead(ead, filename)
        except Exception as e:
            # any exception on prep is most likely ark generation
            return HttpResponseServerError('Failed to prep the document: ' + str(e))

    # on GET, display the xml and make available for download
    if request.method == 'GET':
        prepped_xml = ead.serializeDocument()
        response = HttpResponse(prepped_xml, mimetype='application/xml')
        response['Content-Disposition'] = "attachment; filename=%s" % filename
        return response

    # on POST, save to file and commit to subversion
    if request.method == 'POST':
        file_path = os.path.join(arch.svn_local_path, filename)
        with open(file_path, 'w') as xmlfile: