def prepared_eadxml(request, archive, filename): """On GET, serves out a prepared version of the EAD file in the specified archive subversion directory. Response header is set so the user should be prompted to download the xml, with a filename matching that of the original document. On POST, commits the prepared version of the EAD file to the subversion directory of the specified archive, with a log message indicating the user who requested the commit. Steps taken to prepare a document are documented in :meth:`~findingaids.fa_admin.utils.prep_ead`. :param filename: name of the file to prep; should be base filename only, document will be pulled from the configured source directory. """ # find relative to svn path if associated with an archive prepped_xml = cache.get(filename) arch = get_object_or_404(Archive, slug=archive) fullpath = os.path.join(arch.svn_local_path, filename) if prepped_xml is None: try: ead = load_xmlobject_from_file(fullpath, FindingAid) # validate or not? except XMLSyntaxError, e: # xml is not well-formed : return 500 with error message return HttpResponseServerError("Could not load document: %s" % e) # flash meesage that appear on the screen for user, message itself is generated in utils.py with message_logging(request, 'findingaids.fa_admin.utils', logging.INFO): try: ead = utils.prep_ead(ead, filename) prepped_xml = ead.serializeDocument() cache.set(filename, prepped_xml) except Exception as e: # any exception on prep is most likely ark generation return HttpResponseServerError('Failed to prep the document: ' + str(e))
def handle(self, *args, **options): verbosity = int(options['verbosity']) self._setup_logging(verbosity) # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if verbosity == self.v_all: print "Preparing documents from all defined Archives" updated = 0 unchanged = 0 errored = 0 if len(args): files = args else: files = set() svn = svn_client() for archive in Archive.objects.all(): # update to make sure we have latest version of everything svn.update(str(archive.svn_local_path)) # apparently can't handle unicode files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml')))) for file in files: try: ead = load_xmlobject_from_file(file, FindingAid) orig_xml = ead.serializeDocument(pretty=True) ead = utils.prep_ead(ead, file) # sanity check before saving dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file) errors = utils.check_ead(file, dbpath, xml=ead.serializeDocument()) if errors: errored += 1 print "Prepared EAD for %s does not pass sanity checks, not saving." % file if verbosity >= self.v_normal: print "Errors found:" for err in errors: # some errors include a list of error instances - display nicely if isinstance(err, list): for suberr in err: print " %s" % suberr else: print " %s" % err elif orig_xml == ead.serializeDocument(pretty=True): if verbosity >= self.v_normal: print "No changes made to %s" % file unchanged += 1 else: with open(file, 'w') as f: ead.serializeDocument(f, pretty=True) if verbosity >= self.v_normal: print "Updated %s" % file updated += 1 except XMLSyntaxError, e: # xml is not well-formed print "Error: failed to load %s (document not well-formed XML?)" \ % file errored += 1 except Exception, e: # catch any other exceptions print "Error: failed to prep %s : %s" % (file, e) errored += 1
def test_prep_ead(self): # valid fixtures is an ead with series/subseries, and index # - clear out fixture ark url to trigger generating a new one (simulated) del(self.valid_ead.eadid.url) del(self.valid_ead.eadid.identifier) ead = utils.prep_ead(self.valid_ead, self.valid_eadfile) self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid") self.assertEqual(u'hartsfield558', ead.eadid.value) self.assertEqual(u'hartsfield558_series1', ead.dsc.c[0].id) self.assertEqual(u'hartsfield558_subseries6.1', ead.dsc.c[5].c[0].id) self.assertEqual(u'hartsfield558_index1', ead.archdesc.index[0].id) # ark should be generated and stored in eadid url self.assertEqual(MockDjangoPidmanClient.test_ark, ead.eadid.url) # short-form ark should be stored in identifier attribute self.assert_(MockDjangoPidmanClient.test_ark.endswith(ead.eadid.identifier)) # ead with no series eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests', 'fixtures', 'pittsfreeman1036.xml') ead = load_xmlobject_from_file(eadfile, FindingAid) ead = utils.prep_ead(ead, eadfile) self.assert_(isinstance(ead, FindingAid), "prep_ead should return an instance of FindingAid") self.assertEqual(u'pittsfreeman1036', ead.eadid.value) # series with no unitid eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests', 'fixtures', 'raoul548.xml') ead = load_xmlobject_from_file(eadfile, FindingAid) ead = utils.prep_ead(ead, eadfile) self.assertEqual(u'raoul548_series3', ead.dsc.c[2].id) # whitespace cleanup ead = utils.prep_ead(self.invalid_ead, self.invalid_eadfile) # - no leading whitespace in list title # ead.archdesc.origination is getting normalized, so can't be used for testing origination = ead.node.xpath('//e:origination/e:persname', namespaces={'e': EAD_NAMESPACE}) self.assertEqual(u'Hartsfield, William Berry.', origination[0].text) # test the node text directly (does not include unitdate) self.assertEqual(u'William Berry Hartsfield papers, ', ead.unittitle.node.text) self.assertEqual(u'Gone with the wind (Motion picture)', ead.archdesc.controlaccess.controlaccess[0].title[0].value) self.assertEqual(u'Allen, Ivan.', ead.archdesc.controlaccess.controlaccess[1].person_name[0].value) self.assertEqual(u'Mines and mineral resources--Georgia.', ead.archdesc.controlaccess.controlaccess[3].subject[1].value) # unicode characters self.assertEqual(u'Motion pictures--Georgia. \u2026', ead.archdesc.controlaccess.controlaccess[3].subject[2].value) self.assertEqual(u'Motion pictures.', ead.archdesc.controlaccess.controlaccess[-1].genre_form[0].value) # remaining errors after clean-up: # 1 - duplicate origination # 2 - > 2 containers in a did (summary error and list of problem dids) # 2 - 1 container in a did (summary error and list of problem dids) # = 5 self.assertEqual(5, len(utils.check_eadxml(ead)), "only 3 errors (duplicate origination, 3 containers in a did, 1 container in a did) should be left in invalid test fixture after cleaning") # special case - unittitle begins with a <title> eadfile = os.path.join(settings.BASE_DIR, 'fa', 'tests', 'fixtures', 'pittsfreeman1036.xml') ead = load_xmlobject_from_file(eadfile, FindingAid) ead = utils.prep_ead(ead, eadfile) self.assertFalse(unicode(ead.list_title).startswith('None'), 'cleaned unittitle with leading <title> should not start with "None"')
:param filename: name of the file to prep; should be base filename only, document will be pulled from the configured source directory. """ # find relative to svn path if associated with an archive arch = get_object_or_404(Archive, slug=archive) fullpath = os.path.join(arch.svn_local_path, filename) try: ead = load_xmlobject_from_file(fullpath, FindingAid) # validate or not? except XMLSyntaxError, e: # xml is not well-formed : return 500 with error message return HttpResponseServerError("Could not load document: %s" % e) with message_logging(request, 'findingaids.fa_admin.utils', logging.INFO): try: ead = utils.prep_ead(ead, filename) except Exception as e: # any exception on prep is most likely ark generation return HttpResponseServerError('Failed to prep the document: ' + str(e)) # on GET, display the xml and make available for download if request.method == 'GET': prepped_xml = ead.serializeDocument() response = HttpResponse(prepped_xml, mimetype='application/xml') response['Content-Disposition'] = "attachment; filename=%s" % filename return response # on POST, save to file and commit to subversion if request.method == 'POST': file_path = os.path.join(arch.svn_local_path, filename) with open(file_path, 'w') as xmlfile: