class ModelTest(unittest.TestCase): COLLECTION = EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL, username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD) self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml') fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml') # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count()) def test_sibling_query(self): # test sibling node access via 'also' exc = Exclamation.objects.filter(text='Au revoir').also('next').get() self.assertEqual('monde', exc.next)
class ModelTest(unittest.TestCase): COLLECTION = settings.EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB() self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count())
def preview(request, archive): if request.method == 'POST': archive = get_object_or_404(Archive, slug=archive) filename = request.POST['filename'] errors = [] try: # only load to exist if document passes publication check ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, mode='preview') if ok is not True: return response db = ExistDB() # load the document to the *preview* collection in eXist with the same fileneame preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename # make sure the preview collection exists, but don't complain if it's already there success = db.load(open(fullpath, 'r'), preview_dbpath, overwrite=True) except ExistDBException, e: success = False errors.append(e.message()) if success: # load the file as a FindingAid object so we can generate the preview url ead = load_xmlobject_from_file(fullpath, FindingAid) messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename) # redirect to document preview page with code 303 (See Other) return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid})) else: return render(request, 'fa_admin/publish-errors.html', {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': e})
def test_ead_lastmodified(self): modified = ead_lastmodified('rqst', 'abbey244') self.assert_(isinstance(modified, datetime), "ead_lastmodified should return a datetime object") date_format = '%Y-%m-%d' expected = datetime.now().strftime(date_format) value = modified.strftime(date_format) self.assertEqual(expected, value, 'ead lastmodified should be today, expected %s, got %s' % (expected, value)) # invalid eadid self.assertRaises(Http404, ead_lastmodified, 'rqst', 'bogusid') db = ExistDB() # preview document - load fixture to preview collection fullpath = path.join(exist_fixture_path, 'raoul548.xml') db.load(open(fullpath, 'r'), settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml') preview_modified = ead_lastmodified('rqst', 'raoul548', preview=True) self.assert_(isinstance(preview_modified, datetime), "ead_lastmodified should return a datetime object") # clean up db.removeDocument(settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
def preview(request, archive): if request.method == 'POST': archive = get_object_or_404(Archive, slug=archive) filename = request.POST['filename'] errors = [] err = None try: # only load to exist if document passes publication check ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, mode='preview') if ok is not True: return response db = ExistDB() # load the document to the *preview* collection in eXist with the same fileneame preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename # make sure the preview collection exists, but don't complain if it's already there success = db.load(open(fullpath, 'r'), preview_dbpath) except ExistDBException as err: success = False errors.append(err.message()) if success: # load the file as a FindingAid object so we can generate the preview url ead = load_xmlobject_from_file(fullpath, FindingAid) messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename) # redirect to document preview page with code 303 (See Other) return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid})) else: # no exception but no success means the load failed; # *probably* due to insufficient permissions if errors == [] and success == False: errors.append('Failed to load the document to the preview collection') return render(request, 'fa_admin/publish-errors.html', {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': err}) # NOTE: preview list is not used anymore; functionality is handled # by main admin view; if we revisit preview list, to be more usable it # should be filterable by archive else: fa = get_findingaid(preview=True, only=['eadid', 'list_title', 'last_modified'], order_by='last_modified') return render(request, 'fa_admin/preview_list.html', {'findingaids': fa, #'querytime': [fa.queryTime()] })
class ModelTest(unittest.TestCase): COLLECTION = settings.EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL, username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD) self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count()) def test_sibling_query(self): # test sibling node access via 'also' exc = Exclamation.objects.filter(text='Au revoir').also('next').get() self.assertEqual('monde', exc.next)
# # http://username:[email protected]:8080/exist # # YOU NEED TO INSERT THE USER AND PASSWORD HERE #xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist') xmldb = ExistDB('http://*****:*****@localhost:8080/exist') xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: print "--" + dirpath + '/' + filename xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True) # # Load resources # for (dirpath, dirnames, filenames) in walk('resources'): xmldb.createCollection('docker' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: xmldb.load(f, 'docker' + '/' + dirpath + '/' + filename, True)
file_name = os.path.join(collection_path, "%03d.xml" % (juan,)) result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8", "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties", "-jar", BERTIE_JAR, "--tei", "--file", file_name, "--owl", f.name], stdout=devnull, stderr=devnull) # Reload single document for faster response xmldb = ExistDB(server_url="http://*****:*****@" + existdb_host + ":8080/exist", timeout=10) db_collection_path = 'docker/texts/' + \ collection_path.replace('/docker/dublin-store/', '') with open(file_name) as newly_annotated_file: print " [ ] Reloading single document" try: xmldb.load(newly_annotated_file, os.path.join(db_collection_path, os.path.split(file_name)[1]), True) except: print "FAILED TO LOAD " + file_name # Send response early send_response("OK") start_uima = time.time() result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8", "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties", "-jar", BERTIE_JAR, "--tei", "--directory", collection_path, "--owl", f.name], stdout=devnull, stderr=devnull) done_uima = time.time() print "RUNTIME"
def _load_file_to_exist(self, filename): db = ExistDB() fname = path.split(filename)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) db.load(open(filename), exist_path)
# # http://username:[email protected]:8080/exist # # YOU NEED TO INSERT THE USER AND PASSWORD HERE #xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist') xmldb = ExistDB('http://*****:*****@localhost:8080/exist') xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: print "--" + dirpath + '/' + filename xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True) # # Load resources # for (dirpath, dirnames, filenames) in walk('resources'): xmldb.createCollection('docker' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: xmldb.load(f, 'docker' + '/' + dirpath + '/' + filename, True)
xmldb = ExistDB(timeout=60) xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in sorted(filenames): with open(os.path.join(dirpath, filename)) as f: print "--" + os.path.join(dirpath, filename) try: xmldb.load( f, os.path.join('docker', 'texts', dirpath, filename), True) except: print "FAILED TO LOAD!!! " + filename # # Load resources # for (dirpath, dirnames, filenames) in walk('resources'): xmldb.createCollection('docker' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: xmldb.load(f, os.path.join('docker', dirpath, filename), True) #
class Command(BaseCommand): args = '<filename filename filename ...>' help = '''Loads XML files into the configured eXist collection. The local copy will be *removed* after it is successfully loaded.''' option_list = BaseCommand.option_list + ( make_option('--dry-run', '-n', dest='dryrun', action='store_true', help='''Report on what would be done, but don't delete any files''' ), ) v_normal = 1 def handle(self, *files, **options): verbosity = int(options.get('verbosity', self.v_normal)) # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \ not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return self.db = ExistDB() self.cbgeocoder = CodebookGeocoder() # initalize progress bar pbar = None total = len(files) # init progress bar if processing enough files, running on a terminal if total >= 10 and os.isatty(sys.stderr.fileno()): widgets = [Percentage(), ' (', SimpleProgress(), ')', Bar(), ETA()] pbar = ProgressBar(widgets=widgets, maxval=total).start() errored = 0 loaded = 0 for f in files: success = False if pbar: pbar.update(errored + loaded) try: # full path location where file will be loaded in exist db collection dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f) # TODO: any error checking? validation? start = time.time() cb = load_xmlobject_from_file(f, CodeBook) logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start)) start = time.time() self.prep(cb) logger.debug('%s prepped in %f sec' % (f, time.time() - start)) # load to eXist from string since DDI documents aren't that large, # rather than reloading the file if not options.get('dryrun', False): start = time.time() success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True) logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start)) except IOError as e: self.stdout.write("Error opening %s: %s" % (f, e)) errored += 1 except ExistDBException as e: self.stdout.write("Error: failed to load %s to eXist" % f) self.stdout.write(e.message()) errored += 1 if not options.get('dryrun', False) and success: loaded += 1 if verbosity > self.v_normal: self.stdout.write("Loaded %s as %s" % (f, dbpath)) try: os.remove(f) except OSError as e: self.stdout.write('Error removing %s: %s' % (f, e)) if pbar: pbar.finish() # output a summary of what was done if more than one file was processed if verbosity >= self.v_normal: if loaded > 1: self.stdout.write("%d document%s loaded" % \ (loaded, 's' if loaded != 1 else '')) if errored > 1: self.stdout.write("%d document%s with errors" % \ (errored, 's' if errored != 1 else '')) topic_id = re.compile('^(?P<org>[A-Z]+)[ .](?P<id>[IVX]+(\.[A-Z](\.[0-9]+(\.[a-z]+)?)?)?)') def prep(self, cb): # do any prep work or cleanup that needs to be done # before loading to exist self.local_topics(cb) self.clean_dates(cb) self.cbgeocoder.code_locations(cb) def icpsr_topic_id(self, topic): # generate icpsr topic id in the format needed for lookup in our # topic dictionary; returns None if not an ICPSR topic m = self.topic_id.match(topic) if m: match_info = m.groupdict() if match_info['org'] == 'ICPSR': return '%(org)s.%(id)s' % match_info def local_topics(self, cb): # convert ICPSR topics to local topics for t in cb.topics: topic_id = self.icpsr_topic_id(t.val) if topic_id is not None: new_topic = topic_mappings.get(topic_id, None) if new_topic: cb.topics.append(Topic(val=new_topic, vocab='local')) # conditional topics if the geographic coverage is global if topic_id in conditional_topics['global'] and \ 'Global' in [unicode(gc) for gc in cb.geo_coverage]: cb.topics.append(Topic(val=conditional_topics['global'][topic_id], vocab='local')) def clean_dates(self, cb): # clean up dates so we can search consistently on 4-digit years # or more; dates should be YYYY, YYYY-MM, or YYYY-MM-DD prev_date = None for d in cb.time_periods: # special case: two-digit date as second date in a cycle # interpret as month on the year that starts the cycle if d.event == 'end' and d.cycle == prev_date.cycle and \ len(d.date) == 2: d.date = '%04d-%02d' % (int(prev_date.date), int(d.date)) elif len(d.date) < 4: d.date = '%04d' % int(d.date) # store current date as previous date for next loop, in case # we need to clean up an end date in a cycle prev_date = d
def _load_file_to_exist(self, file): db = ExistDB() fname = path.split(file)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) db.load(open(file), exist_path, True)
def handle(self, *args, **options): verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all v_normal = 1 v_all = 2 if options['pdf_only'] and options['skip_pdf_reload']: raise CommandError("Options -s and -p are not compatible") # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if len(args): files = args else: # Note: copied from prep_ead manage command; move somewhere common? files = set() svn = svn_client() for archive in Archive.objects.all(): # update to make sure we have latest version of everything svn.update(str(archive.svn_local_path)) # apparently can't handle unicode files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml')))) if verbosity == v_all: print 'Documents will be loaded to configured eXist collection: %s' \ % settings.EXISTDB_ROOT_COLLECTION if options['skip_pdf_reload']: print "** Skipping PDFs cache reload" db = ExistDB() loaded = 0 errored = 0 pdf_tasks = {} start_time = datetime.now() if not options['pdf_only']: # unless PDF reload only has been specified, load files for file in files: try: # full path location where file will be loaded in exist db collection dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file) errors = check_ead(file, dbpath) if errors: # report errors, don't load errored += 1 print "Error: %s does not pass publication checks; not loading to eXist." % file if verbosity >= v_normal: print " Errors found:" for err in errors: print " %s" % err else: with open(file, 'r') as eadfile: success = db.load(eadfile, dbpath, overwrite=True) if success: loaded += 1 if verbosity >= v_normal: print "Loaded %s" % file # load the file as a FindingAid object to get the eadid for PDF reload ead = load_xmlobject_from_file(file, FindingAid) # trigger PDF regeneration in the cache and store task result # - unless user has requested PDF reload be skipped if not options['skip_pdf_reload']: pdf_tasks[ead.eadid.value] = reload_cached_pdf.delay(ead.eadid.value) # NOTE: unlike the web admin publish, this does not # generate TaskResult db records; task outcomes will be # checked & reported before the script finishes else: errored += 1 print "Error: failed to load %s to eXist" % file except ExistDBException, e: print "Error: failed to load %s to eXist" % file print e.message() errored += 1 # output a summary of what was done print "%d document%s loaded" % (loaded, 's' if loaded != 1 else '') print "%d document%s with errors" % (errored, 's' if errored != 1 else '')
# xmldb = ExistDB(timeout=60) xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in sorted(filenames): with open(os.path.join(dirpath, filename)) as f: print "--" + os.path.join(dirpath, filename) try: xmldb.load(f, os.path.join('docker', 'texts', dirpath, filename), True) except: print "FAILED TO LOAD!!! " + filename # # Load resources # for (dirpath, dirnames, filenames) in walk('resources'): xmldb.createCollection('docker' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: xmldb.load(f, os.path.join('docker', dirpath, filename), True) # # Load TEI into solr