def preview(request, archive): if request.method == 'POST': archive = get_object_or_404(Archive, slug=archive) filename = request.POST['filename'] errors = [] try: # only load to exist if document passes publication check ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, mode='preview') if ok is not True: return response db = ExistDB() # load the document to the *preview* collection in eXist with the same fileneame preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename # make sure the preview collection exists, but don't complain if it's already there success = db.load(open(fullpath, 'r'), preview_dbpath, overwrite=True) except ExistDBException, e: success = False errors.append(e.message()) if success: # load the file as a FindingAid object so we can generate the preview url ead = load_xmlobject_from_file(fullpath, FindingAid) messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename) # redirect to document preview page with code 303 (See Other) return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid})) else: return render(request, 'fa_admin/publish-errors.html', {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': e})
def _remove_file_from_exist(self, filename): db = ExistDB() fname = path.split(filename)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) # tests could remove fixtures, so an exception here is not a problem try: db.removeDocument(exist_path) except ExistDBException: # any way to determine if error ever needs to be reported? pass
def _remove_file_from_exist(self, file): db = ExistDB() fname = path.split(file)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) # tests could remove fixtures, so an exception here is not a problem try: db.removeDocument(exist_path) except ExistDBException: # any way to determine if error ever needs to be reported? pass
def _fixture_teardown(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() if 'index' in self.exist_fixtures: db.removeCollectionIndex(settings.EXISTDB_ROOT_COLLECTION) if 'directory' in self.exist_fixtures: for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')): self._remove_file_from_exist(filename) if 'files' in self.exist_fixtures: for filename in self.exist_fixtures['files']: self._remove_file_from_exist(filename) return super(TestCase, self)._fixture_teardown()
def use_test_collection(self): self.stored_default_collection = getattr(settings, "EXISTDB_ROOT_COLLECTION", None) if getattr(settings, "EXISTDB_TEST_COLLECTION", None): settings.EXISTDB_ROOT_COLLECTION = settings.EXISTDB_TEST_COLLECTION else: settings.EXISTDB_ROOT_COLLECTION = getattr(settings, "EXISTDB_ROOT_COLLECTION", "/default") + "_test" print >> sys.stderr, "Creating eXist Test Collection: %s" % \ settings.EXISTDB_ROOT_COLLECTION # now that existdb root collection has been set to test collection, init db connection db = ExistDB() # create test collection (don't complain if collection already exists) db.createCollection(settings.EXISTDB_ROOT_COLLECTION, True)
def delete_ead(request, id, archive=None): """ Delete a published EAD. On GET, display a form with information about the document to be removed. On POST, actually remove the specified EAD document from eXist and create (or update) a deleted record for that document in the relational DB. """ # retrieve the finding aid to be deleted with fields needed for # form display or actual deletion if archive is not None: arch = get_object_or_404(Archive, slug=archive) filter = {'repository__fulltext_terms': '"%s"' % arch.name} else: filter = {} try: fa = FindingAid.objects.only('eadid', 'unittitle', 'document_name', 'collection_name').filter(**filter).get(eadid=id) # if this record has been deleted before, get that record and update it deleted_info, created = Deleted.objects.get_or_create(eadid=fa.eadid) deleted_info.title = unicode(fa.unittitle) # update with title from current document render_form = False # on GET, display delete form if request.method == 'GET': # pre-populate the form with info from the finding aid to be removed delete_form = DeleteForm(instance=deleted_info) render_form = True else: # POST : actually delete the document delete_form = DeleteForm(request.POST, instance=deleted_info) if delete_form.is_valid(): delete_form.save() db = ExistDB() try: success = db.removeDocument(fa.collection_name + '/' + fa.document_name) if success: DeleteForm(request.POST, instance=deleted_info).save() messages.success(request, 'Successfully removed <b>%s</b>.' % id) else: # remove exited normally but was not successful messages.error(request, 'Error: failed to removed <b>%s</b>.' % id) except ExistDBException, e: messages.error(request, "Error: failed to remove <b>%s</b> - %s." \ % (id, e.message())) else:
def _fixture_teardown(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() if 'index' in self.exist_fixtures: db.removeCollectionIndex(settings.EXISTDB_ROOT_COLLECTION) if 'directory' in self.exist_fixtures: for file in glob( path.join(self.exist_fixtures['directory'], '*.xml')): self._remove_file_from_exist(file) if 'files' in self.exist_fixtures: for file in self.exist_fixtures['files']: self._remove_file_from_exist(file) return super(TestCase, self)._fixture_teardown()
def preview(request, archive): if request.method == 'POST': archive = get_object_or_404(Archive, slug=archive) filename = request.POST['filename'] errors = [] err = None try: # only load to exist if document passes publication check ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, mode='preview') if ok is not True: return response db = ExistDB() # load the document to the *preview* collection in eXist with the same fileneame preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename # make sure the preview collection exists, but don't complain if it's already there success = db.load(open(fullpath, 'r'), preview_dbpath) except ExistDBException as err: success = False errors.append(err.message()) if success: # load the file as a FindingAid object so we can generate the preview url ead = load_xmlobject_from_file(fullpath, FindingAid) messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename) # redirect to document preview page with code 303 (See Other) return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid})) else: # no exception but no success means the load failed; # *probably* due to insufficient permissions if errors == [] and success == False: errors.append('Failed to load the document to the preview collection') return render(request, 'fa_admin/publish-errors.html', {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': err}) # NOTE: preview list is not used anymore; functionality is handled # by main admin view; if we revisit preview list, to be more usable it # should be filterable by archive else: fa = get_findingaid(preview=True, only=['eadid', 'list_title', 'last_modified'], order_by='last_modified') return render(request, 'fa_admin/preview_list.html', {'findingaids': fa, #'querytime': [fa.queryTime()] })
def _fixture_setup(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() # load index if 'index' in self.exist_fixtures: db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION, open(self.exist_fixtures['index'])) if 'directory' in self.exist_fixtures: for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')): self._load_file_to_exist(filename) if 'files' in self.exist_fixtures: for filename in self.exist_fixtures['files']: self._load_file_to_exist(filename) return super(TestCase, self)._fixture_setup()
def index(request): # XML and SPARQL numbers # Count texts and authors qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(chapter='1') qs = qs.only('title', 'title_en', 'author') # TODO: order by title qs = qs.order_by('title_en') number_texts = qs.count() number_authors = qs.distinct().count() wiki_pages = [] for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")): wiki_pages.append([page.replace(" ", "%20"), page]) data = { 'number_texts': number_texts, 'number_authors': number_authors, 'tei_documents': qs, "wiki_pages": wiki_pages, } return render(request, 'roche/index.html', data)
def _fixture_setup(self): if hasattr(self, 'exist_fixtures'): db = ExistDB() # load index if 'index' in self.exist_fixtures: db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION, open(self.exist_fixtures['index'])) if 'directory' in self.exist_fixtures: for file in glob( path.join(self.exist_fixtures['directory'], '*.xml')): self._load_file_to_exist(file) if 'files' in self.exist_fixtures: for file in self.exist_fixtures['files']: self._load_file_to_exist(file) return super(TestCase, self)._fixture_setup()
def restore_root_collection(self): # if use_test_collection didn't run, don't change anything if self.stored_default_collection is not None: print >> sys.stderr, "Removing eXist Test Collection: %s" % settings.EXISTDB_ROOT_COLLECTION # before restoring existdb non-test root collection, init db connection db = ExistDB() try: # remove test collection db.removeCollection(settings.EXISTDB_ROOT_COLLECTION) except ExistDBException, e: print >> sys.stderr, "Error removing collection %s: %s" \ % (settings.EXISTDB_ROOT_COLLECTION, e) print >> sys.stderr, "Restoring eXist Root Collection: %s" \ % self.stored_default_collection settings.EXISTDB_ROOT_COLLECTION = self.stored_default_collection
def visual_places(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) places = [] for q in qs: places.extend(q.place_names) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_places = {} return render_to_response('r/visual_places.html', { 'tei_documents': qs, 'places': places, 'juan': juan, }, context_instance=RequestContext(request))
def index(request): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) # Make titles unique (maybe there is a better method?) qs = qs.filter(chapter='1') qs = qs.only('title', 'title_en', 'author') return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def index_title(request, letter): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei) # filter by titles starting with letter qs = qs.filter(title__startswith=letter) return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def restore_root_collection(self): # if use_test_collection didn't run, don't change anything delattr(settings, "EXISTDB_ROOT_COLLECTION_REAL") if self.stored_default_collection is not None: print >> sys.stderr, "Removing eXist Test Collection: %s" % settings.EXISTDB_ROOT_COLLECTION # before restoring existdb non-test root collection, init db connection db = ExistDB() try: # remove test collection db.removeCollection(settings.EXISTDB_ROOT_COLLECTION) except ExistDBException, e: print >> sys.stderr, "Error removing collection %s: %s" \ % (settings.EXISTDB_ROOT_COLLECTION, e) print >> sys.stderr, "Restoring eXist Root Collection: %s" \ % self.stored_default_collection settings.EXISTDB_ROOT_COLLECTION = self.stored_default_collection
def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) # create index for collection - should be applied to newly loaded files self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
def index(request): xmldb = ExistDB() qs = QuerySet(using=xmldb, xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI, fulltext_options={'default-operator': 'and'}) qs = qs.filter(body__fulltext_terms='至') return render_to_response('search/index.html', {'tei_documents': qs})
def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL, username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD) self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION
def index_author(request, author, startswith): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei) if startswith: # filter by authors starting with letter qs = qs.filter(author__startswith=author) else: qs = qs.filter(author=author) return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def use_test_collection(self): self.stored_default_collection = getattr(settings, "EXISTDB_ROOT_COLLECTION", None) setattr(settings, "EXISTDB_ROOT_COLLECTION_REAL", self.stored_default_collection) if getattr(settings, "EXISTDB_TEST_COLLECTION", None): settings.EXISTDB_ROOT_COLLECTION = settings.EXISTDB_TEST_COLLECTION else: settings.EXISTDB_ROOT_COLLECTION = getattr( settings, "EXISTDB_ROOT_COLLECTION", "/default") + "_test" print >> sys.stderr, "Creating eXist Test Collection: %s" % \ settings.EXISTDB_ROOT_COLLECTION # now that existdb root collection has been set to test collection, init db connection db = ExistDB() # create test collection (don't complain if collection already exists) db.createCollection(settings.EXISTDB_ROOT_COLLECTION, True)
def text_info(request, title): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) result = "" place_names = [] persons = [] terms = [] chapter_titles = [] for q in qs: number_characters = 0 for d in q.body.div: text = re.sub(RE_INTERPUCTION, '', d.text) text = text.replace("\n", "") #text = text.replace("", "") number_characters += len(text) if q.chapter_title: content = q.chapter_title.replace(" ", "").replace("\n", "")[:70] else: content = 'XXX' if q.chapter: chapter = q.chapter else: chapter = 1 chapter_titles.append([chapter, content, number_characters]) #place_names.extend(q.place_names) #persons.extend(q.persons) #terms.extend(q.terms) place_names = list(set(place_names)) persons = list(set(persons)) terms = list(set(terms)) # Place names for leaflet # place_names js_data = json.dumps([[[50.5, 30.5], "test"]]) return render_to_response('browser/text_view_info.html', { 'tei_documents': qs, 'tei_transform': result, 'place_names': place_names, 'persons': persons, 'terms': terms, 'js_data': js_data, 'chapter_titles': sorted(chapter_titles) }, context_instance=RequestContext(request))
class ModelTest(unittest.TestCase): COLLECTION = settings.EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB() self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count())
class ModelTest(unittest.TestCase): COLLECTION = EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL, username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD) self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml') fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml') # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count()) def test_sibling_query(self): # test sibling node access via 'also' exc = Exclamation.objects.filter(text='Au revoir').also('next').get() self.assertEqual('monde', exc.next)
def setUp(self): self.db = ExistDB() self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION
def test_ead_lastmodified(self): modified = ead_lastmodified('rqst', 'abbey244') self.assert_(isinstance(modified, datetime), "ead_lastmodified should return a datetime object") date_format = '%Y-%m-%d' expected = datetime.now().strftime(date_format) value = modified.strftime(date_format) self.assertEqual(expected, value, 'ead lastmodified should be today, expected %s, got %s' % (expected, value)) # invalid eadid self.assertRaises(Http404, ead_lastmodified, 'rqst', 'bogusid') db = ExistDB() # preview document - load fixture to preview collection fullpath = path.join(exist_fixture_path, 'raoul548.xml') db.load(open(fullpath, 'r'), settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml') preview_modified = ead_lastmodified('rqst', 'raoul548', preview=True) self.assert_(isinstance(preview_modified, datetime), "ead_lastmodified should return a datetime object") # clean up db.removeDocument(settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
def visual_timeline(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) persons = [] for q in qs: persons.extend(q.persons) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_persons = {} if sparql_result: for binding in sparql_result.bindings: sparql_persons[binding[u"person"].value] = [ binding[u"birthYear"].value, binding[u"deathYear"].value ] #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"] timeline_persons = [] for p in set(persons): if sparql_persons.get(p, None): row = [ p, ] row.append(int(sparql_persons[p][0])) row.append(int(sparql_persons[p][1])) timeline_persons.append(row) from operator import itemgetter timeline_persons = sorted(timeline_persons, key=itemgetter(1)) timeline_persons = json.dumps(timeline_persons) return render_to_response('r/visual_timeline.html', { 'tei_documents': qs, 'timeline_persons': timeline_persons, 'juan': juan }, context_instance=RequestContext(request))
def text_download(request, title, file_format, juan=0): """ Download a text or a single chapter as plain text file or as a (colored) pdf. """ import pinyin pinyin_title = pinyin.get(title) qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) if juan: qs = qs.filter(chapter=juan) result = "" for q in qs: for d in q.body.div: result += d.text.replace(" ", "").replace("\n", "").replace( "\t", "").replace(u"。", u"。\n\n") if file_format == 'txt': response = HttpResponse(content_type='text/plain') response[ 'Content-Disposition'] = 'attachment; filename="{}.txt"'.format( pinyin_title) response.write(result) else: from fpdf import FPDF pdf = FPDF(unit='mm', format='A4') pdf.add_page() pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True) pdf.set_font('Droid', '', 12) pdf.write(5, unicode(result)) response = HttpResponse(pdf.output(dest='S'), content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename="{}.pdf"'.format( pinyin_title) return response
def text_view(request, title): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) # filter by title qs = qs.filter(title=title).order_by('chapter') max_juan = qs.count() result = "" for q in qs: result = result + q.body.xsl_transform(xsl=XSL_TRANSFORM_1).serialize() text_title = qs[0].title data = {'tei_documents': qs, 'tei_transform': result, 'text_title': text_title, 'max_juan': max_juan, } return render_to_response('browser/text_view.html', data, context_instance=RequestContext(request))
def get_query_set(self): """ Get the default :class:`eulexistdb.db.QuerySet` returned by this ``Manager``. Typically this returns a ``QuerySet`` based on the ``Manager``'s `xpath`, evaluated in the ``settings.EXISTDB_ROOT_COLLECTION`` on a default :class:`eulexistdb.db.ExistDB`. This is a convenient point for developers to customize an object's managers. Deriving a child class from Manager and overriding or extending this method is a handy way to create custom queries accessible from an :class:`~eulexistdb.models.XmlModel`. """ if hasattr(settings, 'EXISTDB_FULLTEXT_OPTIONS'): fulltext_opts = settings.EXISTDB_FULLTEXT_OPTIONS else: fulltext_opts = {} return QuerySet(model=self.model, xpath=self.xpath, using=ExistDB(), collection=settings.EXISTDB_ROOT_COLLECTION, fulltext_options=fulltext_opts)
def render(self, context): from browser.models import DDBCPlaceName try: self.place_name = self.place_name.resolve(context) except template.VariableDoesNotExist: return '' qs = QuerySet(using=ExistDB(), xpath='/tei:TEI//tei:place', collection='docker/resources/', model=DDBCPlaceName) qs = qs.filter(place_names=self.place_name) ddbc_output = u'' for q in qs: ddbc_output += '<p>' ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>' ddbc_output += 'District: ' + q.district + '<br>' ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>' ddbc_output += 'Location: ' + q.geo + '<br>' ddbc_output += '</p>' return ddbc_output
class ModelTest(unittest.TestCase): COLLECTION = settings.EXISTDB_TEST_COLLECTION def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL, username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD) self.db.createCollection(self.COLLECTION, True) test_dir = os.path.dirname(os.path.abspath(__file__)) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True) fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml') loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True) # temporarily set test collection as root exist collection self._root_collection = settings.EXISTDB_ROOT_COLLECTION settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION def tearDown(self): self.db.removeCollection(self.COLLECTION) settings.EXISTDB_ROOT_COLLECTION = self._root_collection def test_manager(self): partings = Parting.objects.all() self.assertEquals(2, partings.count()) def test_sibling_query(self): # test sibling node access via 'also' exc = Exclamation.objects.filter(text='Au revoir').also('next').get() self.assertEqual('monde', exc.next)
def _load_file_to_exist(self, filename): db = ExistDB() fname = path.split(filename)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) db.load(open(filename), exist_path)
def publish(request): """ Admin publication form. Allows publishing an EAD file by updating or adding it to the configured eXist database so it will be immediately visible on the public site. Files can only be published if they pass an EAD sanity check, implemented in :meth:`~findingaids.fa_admin.utils.check_ead`. On POST, sanity-check the EAD file specified in request from the configured and (if it passes all checks), publish it to make it immediately visible on the site. If publish is successful, redirects the user to main admin page with a success message that links to the published document on the site. If the sanity-check fails, displays a page with any problems found. """ # formerly supported publish from filename, but now only supports # publish from preview if 'preview_id' not in request.POST: messages.error(request, "No preview document specified for publication") return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) id = request.POST['preview_id'] # retrieve info about the document from preview collection try: # because of the way eulcore.existdb.queryset constructs returns with 'also' fields, # it is simpler and better to retrieve document name separately ead = get_findingaid(id, preview=True) ead_docname = get_findingaid(id, preview=True, only=['document_name']) filename = ead_docname.document_name except Http404: # not found in exist messages.error(request, '''Publish failed. Could not retrieve <b>%s</b> from preview collection. Please reload and try again.''' % id) # if ead could not be retrieved from preview mode, skip processing return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) # determine archive this ead is associated with xml = ead.serialize() archive = None if not ead.repository: messages.error(request, '''Publish failed. Could not determine which archive <b>%s</b> belongs to. Please update subarea, reload, and try again.''' % id) else: archive_name = ead.repository[0] # NOTE: EAD supports multiple subarea tags, but in practice we only # use one, so it should be safe to assume the first should be used for permissions try: archive = Archive.objects.get(name=archive_name) except ObjectDoesNotExist: messages.error(request, '''Publish failed. Could not find archive <b>%s</b>.''' % archive_name) # bail out if archive could not be identified if archive is None: return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) # check that user is allowed to publish this document if not archive_access(request.user, archive.slug): messages.error(request, '''You do not have permission to publish <b>%s</b> materials.''' \ % archive.label) return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) errors = [] try: ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, xml=xml) if ok is not True: # publication check failed - do not publish return response # only load to exist if there are no errors found db = ExistDB() # get information to determine if an existing file is being replaced replaced = db.describeDocument(dbpath) try: # move the document from preview collection to configured public collection success = db.moveDocument(settings.EXISTDB_PREVIEW_COLLECTION, settings.EXISTDB_ROOT_COLLECTION, filename) # FindingAid instance ead already set above except ExistDBException, e: # special-case error message errors.append("Failed to move document %s from preview collection to main collection." \ % filename) # re-raise and let outer exception handling take care of it raise e except ExistDBException, e: errors.append(e.message()) success = False
import os from os import walk from eulexistdb.db import ExistDB # # Timeout higher? # # # http://username:[email protected]:8080/exist # # YOU NEED TO INSERT THE USER AND PASSWORD HERE #xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist') xmldb = ExistDB('http://*****:*****@localhost:8080/exist') xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: print "--" + dirpath + '/' + filename xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True)
from os import walk from eulexistdb.db import ExistDB from roche.settings import EXISTDB_SERVER_URL from roche.settings import SOLR_SERVER_URL import sunburnt import libxslt import libxml2 from browser.models import RocheTEI from eulexistdb.query import QuerySet # # Timeout higher? # xmldb = ExistDB(timeout=60) xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in sorted(filenames): with open(os.path.join(dirpath, filename)) as f: print "--" + os.path.join(dirpath, filename) try: xmldb.load( f, os.path.join('docker', 'texts', dirpath, filename),
class Command(BaseCommand): help = """Tasks for managing eXist-db index configuration file. Available subcommands: load-index - load index configuration file to eXist show-index - show the contents of index configuration file currently in eXist index-info - show information about index configuration file in eXist (owner, date modified, etc.) remove-index - remove index configuration from eXist reindex - reindex the configured eXist collection with the loaded index """ arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex'] args = ' | '. join(arg_list) # FIXME/TODO: possibly convert into a django LabelCommand def handle(self, *args, **options): if not len(args) or args[0] == 'help': print self.help return cmd = args[0] if cmd not in self.arg_list: print "Command '%s' not recognized" % cmd print self.help return # check for required settings (used in all modes) if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE: raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing") return collection = settings.EXISTDB_ROOT_COLLECTION index = settings.EXISTDB_INDEX_CONFIGFILE try: # Explicitly request no timeout (even if one is configured # in django settings), since some tasks (such as # reindexing) could take a while. self.db = ExistDB(timeout=None) # check there is already an index config hasindex = self.db.hasCollectionIndex(collection) # for all commands but load, nothing to do if config collection does not exist if not hasindex and cmd != 'load-index': raise CommandError("Collection %s has no index configuration" % collection) if cmd == 'load-index': # load collection index to eXist # no easy way to check if index is different, but give some info to user to help indicate if hasindex: index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) print "Collection already has an index configuration; last modified %s\n" % index_desc['modified'] else: print "This appears to be a new index configuration\n" message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index) success = self.db.loadCollectionIndex(collection, open(index)) if success: print "Succesfully updated %s" % message print """ If your collection already contains data and the index configuration is new or has changed, you should reindex the collection. """ else: raise CommandError("Failed to update %s" % message) elif cmd == 'show-index': # show the contents of the the collection index config file in exist print self.db.getDoc(self.db._collectionIndexPath(collection)) elif cmd == 'index-info': # show information about the collection index config file in exist index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) for field, val in index_desc.items(): print "%s:\t%s" % (field, val) elif cmd == 'remove-index': # remove any collection index in eXist if self.db.removeCollectionIndex(collection): print "Removed collection index configuration for %s" % collection else: raise CommandError("Failed to remove collection index configuration for %s" % collection) elif cmd == 'reindex': # reindex the collection if not self.db.hasCollection(collection): raise CommandError("Collection %s does not exist" % collection) print "Reindexing collection %s" % collection print "-- If you have a large collection, this may take a while." start_time = time.time() success = self.db.reindexCollection(collection) end_time = time.time() if success: print "Successfully reindexed collection %s" % collection print "Reindexing took %.2f seconds" % (end_time - start_time) else: print "Failed to reindexed collection %s" % collection print "-- Check that the configured exist user is in the exist DBA group." except Exception as err: # better error messages would be nice... raise CommandError(err)
class ExistQueryTest(unittest.TestCase): def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel) def tearDown(self): self.db.removeCollection(COLLECTION) def test_count(self): load_fixtures(self.db) self.assertEqual(NUM_FIXTURES, self.qs.count(), "queryset count returns number of fixtures") def test_getitem(self): qs = self.qs.order_by('id') # adding sort order to test reliably self.assertEqual("abc", qs[0].id) self.assertEqual("def", qs[1].id) self.assertEqual("one", qs[2].id) self.assertEqual("xyz", qs[3].id) def test_getitem_typeerror(self): self.assertRaises(TypeError, self.qs.__getitem__, "foo") def test_getitem_indexerror(self): self.assertRaises(IndexError, self.qs.__getitem__, -1) self.assertRaises(IndexError, self.qs.__getitem__, 23) def test_getslice(self): slice = self.qs.order_by('id')[0:2] self.assert_(isinstance(slice, QuerySet)) self.assert_(isinstance(slice[0], QueryTestModel)) self.assertEqual(2, slice.count()) self.assertEqual(2, len(slice)) self.assertEqual('abc', slice[0].id) self.assertEqual('def', slice[1].id) self.assertRaises(IndexError, slice.__getitem__, 2) slice = self.qs.order_by('id')[1:3] self.assertEqual('def', slice[0].id) self.assertEqual('one', slice[1].id) slice = self.qs.order_by('id')[3:5] self.assertEqual(1, slice.count()) self.assertEqual('xyz', slice[0].id) self.assertRaises(IndexError, slice.__getitem__, 1) # test slicing with unspecified bounds slice = self.qs.order_by('id')[:2] self.assertEqual(2, slice.count()) self.assertEqual('def', slice[1].id) slice = self.qs.order_by('id')[1:] self.assertEqual(3, slice.count()) self.assertEqual('one', slice[1].id) self.assertEqual('xyz', slice[2].id) def test_filter(self): fqs = self.qs.filter(contains="two") self.assertEqual(1, fqs.count(), "count returns 1 when filtered - contains 'two'") self.assertEqual("two", fqs[0].name, "name matches filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field(self): fqs = self.qs.filter(name="one") self.assertEqual(1, fqs.count(), "count returns 1 when filtered on name = 'one' (got %s)" % self.qs.count()) self.assertEqual("one", fqs[0].name, "name matches filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_xpath(self): fqs = self.qs.filter(id="abc") self.assertEqual(1, fqs.count(), "count returns 1 when filtered on @id = 'abc' (got %s)" % self.qs.count()) self.assertEqual("two", fqs[0].name, "name returned is correct for id filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_contains(self): fqs = self.qs.filter(name__contains="o") self.assertEqual(3, fqs.count(), "should get 3 matches for filter on name contains 'o' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_contains_special(self): fqs = self.qs.filter(description__contains=' "quote" ') self.assertEqual(1, fqs.count(), "should get 1 match for filter on desc contains ' \"quote\" ' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") fqs = self.qs.filter(description__contains=' &!') self.assertEqual(1, fqs.count(), "should get 1 match for filter on desc contains ' &!' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_startswith(self): fqs = self.qs.filter(name__startswith="o") self.assertEqual(1, fqs.count(), "should get 1 match for filter on name starts with 'o' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_subobject_field(self): fqs = self.qs.filter(sub__subname="la") self.assertEqual(1, fqs.count(), "should get 1 match for filter on sub_subname = 'la' (got %s)" % fqs.count()) def test_filter_in(self): fqs = self.qs.filter(id__in=['abc', 'xyz', 'qrs']) self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on id in list (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") # filtering on a special field - should still be able to return/access it via only fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \ .only('id', 'document_name').order_by('document_name') self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual('f1.xml', fqs[0].document_name) fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \ .also('id', 'document_name').order_by('document_name') self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual('f1.xml', fqs[0].document_name) def test_filter_exists(self): fqs = self.qs.filter(id__exists=True) self.assertEqual(4, fqs.count(), "filter on id exists=true returns all documents") fqs = self.qs.filter(id__exists=False) self.assertEqual(0, fqs.count(), "filter on id exists=false returns no documents") fqs = self.qs.filter(wnn__exists=False) self.assertEqual(3, fqs.count(), "filter on wacky node name exists=false returns 3 documents") def test_or_filter(self): fqs = self.qs.or_filter(id='abc', name='four').only('id') self.assertEqual( 2, fqs.count(), "should get 2 matches for OR filter on id='abc' or name='four' (got %s)" % fqs.count()) ids = [obj.id for obj in fqs.all()] self.assert_('abc' in ids, 'id "abc" in list of ids when OR filter includes id="abc"') self.assert_('def' in ids, 'id "def" in list of ids when OR filter includes name="four') def test_exclude(self): fqs = self.qs.exclude(id='abc', name='one').only('id') self.assertEqual( 2, fqs.count(), "should get 2 matches for exclude filter on id='abc' or name='one' (got %s)" % fqs.count()) ids = [obj.id for obj in fqs.all()] self.assert_('abc' not in ids, 'id "abc" should not be in list of ids when exclude id="abc"') def test_filter_gtelte(self): # < <= > >= # subclass to add a numeric field to test with class CountQueryTestModel(QueryTestModel): name_count = xmlmap.IntegerField('count(name)') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=CountQueryTestModel) # each fixture has one and only one name self.assertEqual(0, qs.filter(name_count__gt=1).count()) self.assertEqual(4, qs.filter(name_count__gte=1).count()) self.assertEqual(4, qs.filter(name_count__lte=1).count()) self.assertEqual(0, qs.filter(name_count__lt=1).count()) def test_filter_document_path(self): # get full test path to first document item = self.qs.filter(name='one').only('document_name', 'collection_name').get() path = '%s/%s' % (item.collection_name, item.document_name) # fqs = self.qs.filter(document_path=path, name='one') self.assertEqual(1, fqs.count()) fqs = self.qs.filter(document_path=path, name='two') self.assertEqual(0, fqs.count()) def test_get(self): result = self.qs.get(contains="two") self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result") self.assertEqual(result.name, "two", "result returned by get() has correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_get_toomany(self): self.assertRaises(ReturnedMultiple, self.qs.get, contains="one") def test_get_nomatch(self): self.assertRaises(DoesNotExist, self.qs.get, contains="fifty-four") def test_get_byname(self): result = self.qs.get(name="one") self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result") self.assertEqual(result.name, "one", "result returned by get() has correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_get(self): result = self.qs.filter(contains="one").filter(name="two").get() self.assert_(isinstance(result, QueryTestModel)) self.assertEqual("two", result.name, "filtered get() returns correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_reset(self): self.qs.filter(contains="two") self.qs.reset() self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_order_by(self): # element fqs = self.qs.order_by('name') self.assertEqual('four', fqs[0].name) self.assertEqual('one', fqs[1].name) self.assertEqual('three', fqs[2].name) self.assertEqual('two', fqs[3].name) self.assert_('order by ' not in self.qs.query.getQuery(), "main queryset unchanged by order_by()") # attribute fqs = self.qs.order_by('id') self.assertEqual('abc', fqs[0].id) self.assertEqual('def', fqs[1].id) self.assertEqual('one', fqs[2].id) self.assertEqual('xyz', fqs[3].id) # reverse sorting fqs = self.qs.order_by('-name') self.assertEqual('four', fqs[3].name) self.assertEqual('two', fqs[0].name) fqs = self.qs.order_by('-id') self.assertEqual('abc', fqs[3].id) self.assertEqual('xyz', fqs[0].id) # case-insensitive sorting - upper-case description should not sort first fqs = self.qs.order_by('~description') self.assert_(fqs[0].description.startswith('third')) self.assert_(fqs[1].description.startswith('This one contains')) # reverse case-insensitive sorting - flags in either order fqs = self.qs.order_by('~-description') self.assert_(fqs[3].description.startswith('third')) fqs = self.qs.order_by('-~description') self.assert_(fqs[3].description.startswith('third')) def test_only(self): self.qs.only('name') self.assert_('element name {' not in self.qs.query.getQuery(), "main queryset unchanged by only()") fqs = self.qs.filter(id='one').only('name', 'id', 'sub', 'or_field') self.assert_(isinstance(fqs[0], QueryTestModel)) # actually a Partial type derived from this # attributes that should be present self.assertNotEqual(fqs[0].id, None) self.assertNotEqual(fqs[0].sub, None) self.assertNotEqual(fqs[0].sub.subname, None) self.assertNotEqual(fqs[0].or_field, None) # attribute not returned self.assertEqual(fqs[0].description, None) self.assertEqual('one', fqs[0].id) self.assertEqual('one', fqs[0].name) self.assertEqual('la', fqs[0].sub.subname) self.assertEqual('one', fqs[0].or_field) # = name (first of ORed fields present) fqs = self.qs.filter(id='one').only('wnn') self.assertTrue(hasattr(fqs[0], "wnn")) self.assertEqual(42, fqs[0].wnn) # nested field return fqs = self.qs.filter(id='one').only('name', 'id', 'sub__subname') self.assertEqual('la', fqs[0].sub.subname) # xpath function return fqs = self.qs.filter(id='one').only('substring') self.assertEqual('o', fqs[0].substring) # sub-subclass fqs = self.qs.filter(id='one').only('sub__ssc') self.assert_(isinstance(fqs[0], QueryTestModel)) def test_only_hash(self): fqs = self.qs.only('hash') # no filters, should return all 3 test objects for result in fqs: # each return object should have a 40-character SHA-1 hash checksum self.assertEqual(40, len(result.hash), 'xquery result should have 40-character checksum, got %s' % result.hash) def test_document_name(self): fqs = self.qs.filter(id='one').only('document_name') # document_name attribute should be present self.assertNotEqual(fqs[0].document_name, None) self.assertEqual(fqs[0].document_name, "f1.xml") fqs = self.qs.filter(id='one').also('document_name') self.assertNotEqual(fqs[0].document_name, None) self.assertEqual(fqs[0].document_name, "f1.xml") def test_collection_name(self): fqs = self.qs.filter(id='one').only('collection_name') self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION) fqs = self.qs.filter(id='one').also('collection_name') self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION) def test_only_lastmodified(self): fqs = self.qs.only('last_modified') # no filters, should return all 3 test objects for result in fqs: self.assert_(isinstance(result.last_modified, datetime)) def test_iter(self): for q in self.qs: self.assert_(isinstance(q, QueryTestModel)) def test_slice_iter(self): i = 0 for q in self.qs[1:2]: i += 1 self.assertEqual(1, i) def test_also(self): class SubqueryTestModel(xmlmap.XmlObject): name = xmlmap.StringField('.') parent_id = xmlmap.StringField('parent::root/@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name') name = qs.also('parent_id').get(name__exact='two') self.assertEqual('abc', name.parent_id, "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '" + name.parent_id + "'") def test_also_subfield(self): class SubqueryTestModel(xmlmap.XmlObject): subname = xmlmap.StringField('subname') parent = xmlmap.NodeField('parent::root', QueryTestModel) qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub') name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la') self.assertEqual('la', name.subname) self.assertEqual('one', name.parent.id) self.assertEqual(42, name.parent.wnn) def test_also_raw(self): class SubqueryTestModel(QueryTestModel): myid = xmlmap.StringField('@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root') qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)') self.assertEqual('abc', qs[0].myid) # filtered version of the queryset with raw obj = qs.filter(name='two').get() self.assertEqual('abc', obj.myid) # multiple parameters obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name) def test_only_raw(self): qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two') self.assertEqual('abc', qs[0].id) # filtered version obj = qs.get() self.assertEqual('abc', obj.id) # when combined with regular only, other fields come back correctly qs = self.qs.only('name', 'description', 'substring') obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc') self.assertEqual('two', obj.name) self.assertEqual('t', obj.substring) self.assertEqual('this one only has two', obj.description) self.assertEqual('abc', obj.id) # subfield obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one') self.assertEqual('la', obj.sub.subname) # multiple parameters obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name) # list field - multiple return values class MyQueryTest(QueryTestModel): name = xmlmap.StringListField('name') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest) # return one object but find all the names in the test collection obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc') # 4 names in test fixtures - should come back as a list of those 4 names self.assertEqual(4, len(obj.name)) def test_getDocument(self): obj = self.qs.getDocument("f1.xml") self.assert_(isinstance(obj, QueryTestModel), "object returned by getDocument is instance of QueryTestModel") self.assertEqual("one", obj.name) def test_distinct(self): qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name') vals = qs.distinct() self.assert_('one' in vals) self.assert_('two' in vals) self.assert_('three' in vals) self.assert_('four' in vals) self.assert_('abc' not in vals) def test_namespaces(self): # filter on a field with a namespace fqs = self.qs.filter(nsfield='namespaced').all() self.assertEqual('namespaced', fqs[0].nsfield)
subprocess.call(["ssh-agent", "bash", "-c", "ssh-add /docker/github_rsa ; /usr/bin/git pull;"], stdout=devnull, stderr=devnull) # Call UIMA analysis engine if not juan == -1: file_name = os.path.join(collection_path, "%03d.xml" % (juan,)) result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8", "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties", "-jar", BERTIE_JAR, "--tei", "--file", file_name, "--owl", f.name], stdout=devnull, stderr=devnull) # Reload single document for faster response xmldb = ExistDB(server_url="http://*****:*****@" + existdb_host + ":8080/exist", timeout=10) db_collection_path = 'docker/texts/' + \ collection_path.replace('/docker/dublin-store/', '') with open(file_name) as newly_annotated_file: print " [ ] Reloading single document" try: xmldb.load(newly_annotated_file, os.path.join(db_collection_path, os.path.split(file_name)[1]), True) except: print "FAILED TO LOAD " + file_name # Send response early send_response("OK") start_uima = time.time() result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8", "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties",
class Command(BaseCommand): args = '<filename filename filename ...>' help = '''Loads XML files into the configured eXist collection. The local copy will be *removed* after it is successfully loaded.''' option_list = BaseCommand.option_list + ( make_option('--dry-run', '-n', dest='dryrun', action='store_true', help='''Report on what would be done, but don't delete any files''' ), ) v_normal = 1 def handle(self, *files, **options): verbosity = int(options.get('verbosity', self.v_normal)) # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \ not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return self.db = ExistDB() self.cbgeocoder = CodebookGeocoder() # initalize progress bar pbar = None total = len(files) # init progress bar if processing enough files, running on a terminal if total >= 10 and os.isatty(sys.stderr.fileno()): widgets = [Percentage(), ' (', SimpleProgress(), ')', Bar(), ETA()] pbar = ProgressBar(widgets=widgets, maxval=total).start() errored = 0 loaded = 0 for f in files: success = False if pbar: pbar.update(errored + loaded) try: # full path location where file will be loaded in exist db collection dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f) # TODO: any error checking? validation? start = time.time() cb = load_xmlobject_from_file(f, CodeBook) logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start)) start = time.time() self.prep(cb) logger.debug('%s prepped in %f sec' % (f, time.time() - start)) # load to eXist from string since DDI documents aren't that large, # rather than reloading the file if not options.get('dryrun', False): start = time.time() success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True) logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start)) except IOError as e: self.stdout.write("Error opening %s: %s" % (f, e)) errored += 1 except ExistDBException as e: self.stdout.write("Error: failed to load %s to eXist" % f) self.stdout.write(e.message()) errored += 1 if not options.get('dryrun', False) and success: loaded += 1 if verbosity > self.v_normal: self.stdout.write("Loaded %s as %s" % (f, dbpath)) try: os.remove(f) except OSError as e: self.stdout.write('Error removing %s: %s' % (f, e)) if pbar: pbar.finish() # output a summary of what was done if more than one file was processed if verbosity >= self.v_normal: if loaded > 1: self.stdout.write("%d document%s loaded" % \ (loaded, 's' if loaded != 1 else '')) if errored > 1: self.stdout.write("%d document%s with errors" % \ (errored, 's' if errored != 1 else '')) topic_id = re.compile('^(?P<org>[A-Z]+)[ .](?P<id>[IVX]+(\.[A-Z](\.[0-9]+(\.[a-z]+)?)?)?)') def prep(self, cb): # do any prep work or cleanup that needs to be done # before loading to exist self.local_topics(cb) self.clean_dates(cb) self.cbgeocoder.code_locations(cb) def icpsr_topic_id(self, topic): # generate icpsr topic id in the format needed for lookup in our # topic dictionary; returns None if not an ICPSR topic m = self.topic_id.match(topic) if m: match_info = m.groupdict() if match_info['org'] == 'ICPSR': return '%(org)s.%(id)s' % match_info def local_topics(self, cb): # convert ICPSR topics to local topics for t in cb.topics: topic_id = self.icpsr_topic_id(t.val) if topic_id is not None: new_topic = topic_mappings.get(topic_id, None) if new_topic: cb.topics.append(Topic(val=new_topic, vocab='local')) # conditional topics if the geographic coverage is global if topic_id in conditional_topics['global'] and \ 'Global' in [unicode(gc) for gc in cb.geo_coverage]: cb.topics.append(Topic(val=conditional_topics['global'][topic_id], vocab='local')) def clean_dates(self, cb): # clean up dates so we can search consistently on 4-digit years # or more; dates should be YYYY, YYYY-MM, or YYYY-MM-DD prev_date = None for d in cb.time_periods: # special case: two-digit date as second date in a cycle # interpret as month on the year that starts the cycle if d.event == 'end' and d.cycle == prev_date.cycle and \ len(d.date) == 2: d.date = '%04d-%02d' % (int(prev_date.date), int(d.date)) elif len(d.date) < 4: d.date = '%04d' % int(d.date) # store current date as previous date for next loop, in case # we need to clean up an end date in a cycle prev_date = d
def handle(self, *args, **options): verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all v_normal = 1 v_all = 2 if options['pdf_only'] and options['skip_pdf_reload']: raise CommandError("Options -s and -p are not compatible") # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if len(args): files = args else: # Note: copied from prep_ead manage command; move somewhere common? files = set() svn = svn_client() for archive in Archive.objects.all(): # update to make sure we have latest version of everything svn.update(str(archive.svn_local_path)) # apparently can't handle unicode files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml')))) if verbosity == v_all: print 'Documents will be loaded to configured eXist collection: %s' \ % settings.EXISTDB_ROOT_COLLECTION if options['skip_pdf_reload']: print "** Skipping PDFs cache reload" db = ExistDB() loaded = 0 errored = 0 pdf_tasks = {} start_time = datetime.now() if not options['pdf_only']: # unless PDF reload only has been specified, load files for file in files: try: # full path location where file will be loaded in exist db collection dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file) errors = check_ead(file, dbpath) if errors: # report errors, don't load errored += 1 print "Error: %s does not pass publication checks; not loading to eXist." % file if verbosity >= v_normal: print " Errors found:" for err in errors: print " %s" % err else: with open(file, 'r') as eadfile: success = db.load(eadfile, dbpath, overwrite=True) if success: loaded += 1 if verbosity >= v_normal: print "Loaded %s" % file # load the file as a FindingAid object to get the eadid for PDF reload ead = load_xmlobject_from_file(file, FindingAid) # trigger PDF regeneration in the cache and store task result # - unless user has requested PDF reload be skipped if not options['skip_pdf_reload']: pdf_tasks[ead.eadid.value] = reload_cached_pdf.delay(ead.eadid.value) # NOTE: unlike the web admin publish, this does not # generate TaskResult db records; task outcomes will be # checked & reported before the script finishes else: errored += 1 print "Error: failed to load %s to eXist" % file except ExistDBException, e: print "Error: failed to load %s to eXist" % file print e.message() errored += 1 # output a summary of what was done print "%d document%s loaded" % (loaded, 's' if loaded != 1 else '') print "%d document%s with errors" % (errored, 's' if errored != 1 else '')
def _load_file_to_exist(self, file): db = ExistDB() fname = path.split(file)[-1] exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname) db.load(open(file), exist_path, True)
def publish(request): """ Admin publication form. Allows publishing an EAD file by updating or adding it to the configured eXist database so it will be immediately visible on the public site. Files can only be published if they pass an EAD sanity check, implemented in :meth:`~findingaids.fa_admin.utils.check_ead`. On POST, sanity-check the EAD file specified in request from the configured and (if it passes all checks), publish it to make it immediately visible on the site. If publish is successful, redirects the user to main admin page with a success message that links to the published document on the site. If the sanity-check fails, displays a page with any problems found. """ # formerly supported publish from filename, but now only supports # publish from preview if 'preview_id' not in request.POST: messages.error(request, "No preview document specified for publication") return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) id = request.POST['preview_id'] # retrieve info about the document from preview collection try: # because of the way existdb.query.queryset constructs returns with 'also' fields, # it is simpler and better to retrieve document name separately ead = get_findingaid(id, preview=True) ead_docname = get_findingaid(id, preview=True, only=['document_name']) filename = ead_docname.document_name except (ExistDBException, Http404): # not found in exist OR permission denied messages.error(request, '''Publish failed. Could not retrieve <b>%s</b> from preview collection. Please reload and try again.''' % id) # if ead could not be retrieved from preview mode, skip processing return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) # determine archive this ead is associated with archive = None if not ead.repository: messages.error(request, '''Publish failed. Could not determine which archive <b>%s</b> belongs to. Please update subarea, reload, and try again.''' % id) else: archive_name = ead.repository[0] # NOTE: EAD supports multiple subarea tags, but in practice we only # use one, so it should be safe to assume the first should be used for permissions try: archive = Archive.objects.get(name=archive_name) except ObjectDoesNotExist: messages.error(request, '''Publish failed. Could not find archive <b>%s</b>.''' % archive_name) # bail out if archive could not be identified if archive is None: return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) # check that user is allowed to publish this document if not archive_access(request.user, archive.slug): messages.error(request, '''You do not have permission to publish <b>%s</b> materials.''' \ % archive.label) return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) errors = [] try: # NOTE: *not* using serialized xml here, because it may introduce # whitespace errors not present in the original file. ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive) if ok is not True: # publication check failed - do not publish return response # only load to exist if there are no errors found db = ExistDB() # get information to determine if an existing file is being replaced replaced = db.describeDocument(dbpath) try: # move the document from preview collection to configured public collection success = db.moveDocument(settings.EXISTDB_PREVIEW_COLLECTION, settings.EXISTDB_ROOT_COLLECTION, filename) # FindingAid instance ead already set above except ExistDBException, e: # special-case error message errors.append("Failed to move document %s from preview collection to main collection." \ % filename) # re-raise and let outer exception handling take care of it raise e except ExistDBException as err: errors.append(err.message()) success = False if success: # request the cache to reload the PDF - queue asynchronous task result = reload_cached_pdf.delay(ead.eadid.value) task = TaskResult(label='PDF reload', object_id=ead.eadid.value, url=reverse('fa:findingaid', kwargs={'id': ead.eadid.value}), task_id=result.task_id) task.save() ead_url = reverse('fa:findingaid', kwargs={'id': ead.eadid.value}) change = "updated" if replaced else "added" messages.success(request, 'Successfully %s <b>%s</b>. View <a href="%s">%s</a>.' % (change, filename, ead_url, unicode(ead.unittitle))) # redirect to main admin page and display messages return HttpResponseSeeOtherRedirect(reverse('fa-admin:index')) else: return render(request, 'fa_admin/publish-errors.html', {'errors': errors, 'filename': filename, 'mode': 'publish', 'exception': err})
def handle(self, *args, **options): if not len(args) or args[0] == 'help': print self.help return cmd = args[0] if cmd not in self.arg_list: print "Command '%s' not recognized" % cmd print self.help return # check for required settings (used in all modes) if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE: raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing") return collection = settings.EXISTDB_ROOT_COLLECTION index = settings.EXISTDB_INDEX_CONFIGFILE credentials = {} if options.get('username') is not None: credentials['EXISTDB_SERVER_USER'] = options.get('username') if options.get('password') is not None: credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password') try: # Explicitly request no timeout (even if one is configured # in django settings), since some tasks (such as # reindexing) could take a while. if credentials: # NOTE: override_settings is a test utility, but this is currently # the simplest way to specify credentials, since by default existdb # with override_settings(**credentials): self.db = ExistDB(timeout=None) else: self.db = ExistDB(timeout=None) # check there is already an index config hasindex = self.db.hasCollectionIndex(collection) # for all commands but load, nothing to do if config collection does not exist if not hasindex and cmd != 'load-index': raise CommandError("Collection %s has no index configuration" % collection) if cmd == 'load-index': # load collection index to eXist # no easy way to check if index is different, but give some info to user to help indicate if hasindex: index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) print "Collection already has an index configuration; last modified %s\n" % index_desc['modified'] else: print "This appears to be a new index configuration\n" message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index) success = self.db.loadCollectionIndex(collection, open(index)) if success: print "Succesfully updated %s" % message print """ If your collection already contains data and the index configuration is new or has changed, you should reindex the collection. """ else: raise CommandError("Failed to update %s" % message) elif cmd == 'show-index': # show the contents of the the collection index config file in exist print self.db.getDoc(self.db._collectionIndexPath(collection)) elif cmd == 'index-info': # show information about the collection index config file in exist index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) for field, val in index_desc.items(): print "%s:\t%s" % (field, val) elif cmd == 'remove-index': # remove any collection index in eXist if self.db.removeCollectionIndex(collection): print "Removed collection index configuration for %s" % collection else: raise CommandError("Failed to remove collection index configuration for %s" % collection) elif cmd == 'reindex': # reindex the collection if not self.db.hasCollection(collection): raise CommandError("Collection %s does not exist" % collection) print "Reindexing collection %s" % collection print "-- If you have a large collection, this may take a while." start_time = time.time() success = self.db.reindexCollection(collection) end_time = time.time() if success: print "Successfully reindexed collection %s" % collection print "Reindexing took %.2f seconds" % (end_time - start_time) else: print "Failed to reindexed collection %s" % collection print "-- Check that the configured exist user is in the exist DBA group or specify different credentials." except Exception as err: # better error messages would be nice... raise CommandError(err)
def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
class Command(BaseCommand): help = """Tasks for managing eXist-db index configuration file. Available subcommands: load-index - load index configuration file to eXist show-index - show the contents of index configuration file currently in eXist index-info - show information about index configuration file in eXist (owner, date modified, etc.) remove-index - remove index configuration from eXist reindex - reindex the configured eXist collection with the loaded index """ arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex'] args = ' | '. join(arg_list) def get_password_option(option, opt, value, parser): setattr(parser.values, option.dest, getpass()) option_list = BaseCommand.option_list + ( make_option('--username', '-u', dest='username', action='store', help='''Username to use when connecting to eXist (overrides any in local settings)'''), make_option('--password', '-p', dest='password', action='callback', callback=get_password_option, help='''Prompt for password (required when --username is specified)'''), ) # FIXME/TODO: possibly convert into a django LabelCommand def handle(self, *args, **options): if not len(args) or args[0] == 'help': print self.help return cmd = args[0] if cmd not in self.arg_list: print "Command '%s' not recognized" % cmd print self.help return # check for required settings (used in all modes) if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE: raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing") collection = settings.EXISTDB_ROOT_COLLECTION index = settings.EXISTDB_INDEX_CONFIGFILE credentials = {} if options.get('username') is not None: credentials['EXISTDB_SERVER_USER'] = options.get('username') if options.get('password') is not None: credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password') try: # Explicitly request no timeout (even if one is configured # in django settings), since some tasks (such as # reindexing) could take a while. if credentials: # NOTE: override_settings is a test utility, but this is currently # the simplest way to specify credentials, since by default existdb # with override_settings(**credentials): self.db = ExistDB(timeout=None) else: self.db = ExistDB(timeout=None) # check there is already an index config hasindex = self.db.hasCollectionIndex(collection) # for all commands but load, nothing to do if config collection does not exist if not hasindex and cmd != 'load-index': raise CommandError("Collection %s has no index configuration" % collection) if cmd == 'load-index': # load collection index to eXist # no easy way to check if index is different, but give some info to user to help indicate if hasindex: index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) print "Collection already has an index configuration; last modified %s\n" % index_desc['modified'] else: print "This appears to be a new index configuration\n" message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index) success = self.db.loadCollectionIndex(collection, open(index)) if success: print "Succesfully updated %s" % message print """ If your collection already contains data and the index configuration is new or has changed, you should reindex the collection. """ else: raise CommandError("Failed to update %s" % message) elif cmd == 'show-index': # show the contents of the the collection index config file in exist print self.db.getDoc(self.db._collectionIndexPath(collection)) elif cmd == 'index-info': # show information about the collection index config file in exist index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection)) for field, val in index_desc.items(): print "%s:\t%s" % (field, val) elif cmd == 'remove-index': # remove any collection index in eXist if self.db.removeCollectionIndex(collection): print "Removed collection index configuration for %s" % collection else: raise CommandError("Failed to remove collection index configuration for %s" % collection) elif cmd == 'reindex': # reindex the collection if not self.db.hasCollection(collection): raise CommandError("Collection %s does not exist" % collection) print "Reindexing collection %s" % collection print "-- If you have a large collection, this may take a while." start_time = time.time() success = self.db.reindexCollection(collection) end_time = time.time() if success: print "Successfully reindexed collection %s" % collection print "Reindexing took %.2f seconds" % (end_time - start_time) else: print "Failed to reindexed collection %s" % collection print "-- Check that the configured exist user is in the exist DBA group or specify different credentials." except Exception as err: # better error messages would be nice... raise CommandError(err)
from os import walk from eulexistdb.db import ExistDB from roche.settings import EXISTDB_SERVER_URL from roche.settings import SOLR_SERVER_URL import sunburnt import libxslt import libxml2 from browser.models import RocheTEI from eulexistdb.query import QuerySet # # Timeout higher? # xmldb = ExistDB(timeout=60) xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in sorted(filenames): with open(os.path.join(dirpath, filename)) as f: print "--" + os.path.join(dirpath, filename) try: xmldb.load(f, os.path.join('docker', 'texts', dirpath, filename), True) except:
def handle(self, *files, **options): verbosity = int(options.get('verbosity', self.v_normal)) # check for required settings if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \ not settings.EXISTDB_ROOT_COLLECTION: raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing") return self.db = ExistDB() self.cbgeocoder = CodebookGeocoder() # initalize progress bar pbar = None total = len(files) # init progress bar if processing enough files, running on a terminal if total >= 10 and os.isatty(sys.stderr.fileno()): widgets = [Percentage(), ' (', SimpleProgress(), ')', Bar(), ETA()] pbar = ProgressBar(widgets=widgets, maxval=total).start() errored = 0 loaded = 0 for f in files: success = False if pbar: pbar.update(errored + loaded) try: # full path location where file will be loaded in exist db collection dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f) # TODO: any error checking? validation? start = time.time() cb = load_xmlobject_from_file(f, CodeBook) logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start)) start = time.time() self.prep(cb) logger.debug('%s prepped in %f sec' % (f, time.time() - start)) # load to eXist from string since DDI documents aren't that large, # rather than reloading the file if not options.get('dryrun', False): start = time.time() success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True) logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start)) except IOError as e: self.stdout.write("Error opening %s: %s" % (f, e)) errored += 1 except ExistDBException as e: self.stdout.write("Error: failed to load %s to eXist" % f) self.stdout.write(e.message()) errored += 1 if not options.get('dryrun', False) and success: loaded += 1 if verbosity > self.v_normal: self.stdout.write("Loaded %s as %s" % (f, dbpath)) try: os.remove(f) except OSError as e: self.stdout.write('Error removing %s: %s' % (f, e)) if pbar: pbar.finish() # output a summary of what was done if more than one file was processed if verbosity >= self.v_normal: if loaded > 1: self.stdout.write("%d document%s loaded" % \ (loaded, 's' if loaded != 1 else '')) if errored > 1: self.stdout.write("%d document%s with errors" % \ (errored, 's' if errored != 1 else ''))
import os from os import walk from eulexistdb.db import ExistDB # # Timeout higher? # # # http://username:[email protected]:8080/exist # # YOU NEED TO INSERT THE USER AND PASSWORD HERE #xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist') xmldb = ExistDB('http://*****:*****@localhost:8080/exist') xmldb.createCollection('docker', True) xmldb.createCollection('docker/texts', True) os.chdir('../dublin-store') for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'): xmldb.createCollection('docker/texts' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: print "--" + dirpath + '/' + filename xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True) #
class ExistQueryTest__FullText(unittest.TestCase): # when full-text indexing is enabled, eXist must index files when they are loaded to the db # this makes tests *significantly* slower # any tests that require full-text queries should be here # sample lucene configuration for testing full-text queries FIXTURE_INDEX = ''' <collection xmlns="http://exist-db.org/collection-config/1.0"> <index> <lucene> <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> <text qname="description"/> <text qname="root"/> </lucene> </index> </collection> ''' def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) # create index for collection - should be applied to newly loaded files self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel) def tearDown(self): self.db.removeCollection(COLLECTION) self.db.removeCollectionIndex(COLLECTION) def test_filter_fulltext_terms(self): fqs = self.qs.filter(description__fulltext_terms='only two') self.assertEqual(1, fqs.count(), "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count()) def test_filter_fulltext_options(self): qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel, fulltext_options={'default-operator': 'and'}) # search for terms present in fixtures - but not both present in one doc fqs = qs.filter(description__fulltext_terms='only third') # for now, just confirm that the option is passed through to query self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery()) # TODO: test this properly! # query options not supported in current version of eXist # self.assertEqual(0, fqs.count()) def test_order_by__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score') self.assertEqual('one', fqs[0].name) # one appears 3 times, should be first def test_only__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name') self.assert_(isinstance(fqs[0], QueryTestModel)) # actually a Partial type derived from this # fulltext score attribute should be present self.assertNotEqual(fqs[0].fulltext_score, None) self.assert_(float(fqs[0].fulltext_score) > 0.5) # full-text score should be a float def test_fulltext_highlight(self): fqs = self.qs.filter(description__fulltext_terms='only two') # result from fulltext search - by default, xml should have exist:match tags self.assert_('<exist:match' in fqs[0].serialize()) fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # order of args in the same filter should not matter fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two') # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # separate filters should also work fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) def test_highlight(self): fqs = self.qs.filter(highlight='supercalifragilistic') self.assertEqual(4, fqs.count(), "highlight filter returns all documents even though search term is not present") fqs = self.qs.filter(highlight='one').order_by('id') self.assert_('<exist:match' in fqs[0].serialize()) def test_match_count(self): fqs = self.qs.filter(id='one', highlight='one').only('match_count') self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found") def test_using(self): fqs = self.qs.using('new-collection') # using should update the collection on the xquery object self.assertEqual('new-collection', fqs.query.collection)