def test_only_raw(self): qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two') self.assertEqual('abc', qs[0].id) # filtered version obj = qs.get() self.assertEqual('abc', obj.id) # when combined with regular only, other fields come back correctly qs = self.qs.only('name', 'description', 'substring') obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc') self.assertEqual('two', obj.name) self.assertEqual('t', obj.substring) self.assertEqual('this one only has two', obj.description) self.assertEqual('abc', obj.id) # subfield obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one') self.assertEqual('la', obj.sub.subname) # multiple parameters obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name) # list field - multiple return values class MyQueryTest(QueryTestModel): name = xmlmap.StringListField('name') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest) # return one object but find all the names in the test collection obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc') # 4 names in test fixtures - should come back as a list of those 4 names self.assertEqual(4, len(obj.name))
def visual_places(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) places = [] for q in qs: places.extend(q.place_names) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_places = {} return render_to_response('r/visual_places.html', { 'tei_documents': qs, 'places': places, 'juan': juan, }, context_instance=RequestContext(request))
def index_title(request, letter): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei) # filter by titles starting with letter qs = qs.filter(title__startswith=letter) return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def test_filter_fulltext_options(self): qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel, fulltext_options={'default-operator': 'and'}) # search for terms present in fixtures - but not both present in one doc fqs = qs.filter(description__fulltext_terms='only third') # for now, just confirm that the option is passed through to query self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery())
def test_distinct(self): qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name') vals = qs.distinct() self.assert_('one' in vals) self.assert_('two' in vals) self.assert_('three' in vals) self.assert_('four' in vals) self.assert_('abc' not in vals)
def index(request): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) # Make titles unique (maybe there is a better method?) qs = qs.filter(chapter='1') qs = qs.only('title', 'title_en', 'author') return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def test_also(self): class SubqueryTestModel(xmlmap.XmlObject): name = xmlmap.StringField('.') parent_id = xmlmap.StringField('parent::root/@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name') name = qs.also('parent_id').get(name__exact='two') self.assertEqual('abc', name.parent_id, "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '" + name.parent_id + "'")
def test_also_subfield(self): class SubqueryTestModel(xmlmap.XmlObject): subname = xmlmap.StringField('subname') parent = xmlmap.NodeField('parent::root', QueryTestModel) qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub') name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la') self.assertEqual('la', name.subname) self.assertEqual('one', name.parent.id) self.assertEqual(42, name.parent.wnn)
def index(request): xmldb = ExistDB() qs = QuerySet(using=xmldb, xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI, fulltext_options={'default-operator': 'and'}) qs = qs.filter(body__fulltext_terms='至') return render_to_response('search/index.html', {'tei_documents': qs})
def index_author(request, author, startswith): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei) if startswith: # filter by authors starting with letter qs = qs.filter(author__startswith=author) else: qs = qs.filter(author=author) return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
def text_info(request, title): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) result = "" place_names = [] persons = [] terms = [] chapter_titles = [] for q in qs: number_characters = 0 for d in q.body.div: text = re.sub(RE_INTERPUCTION, '', d.text) text = text.replace("\n", "") #text = text.replace("", "") number_characters += len(text) if q.chapter_title: content = q.chapter_title.replace(" ", "").replace("\n", "")[:70] else: content = 'XXX' if q.chapter: chapter = q.chapter else: chapter = 1 chapter_titles.append([chapter, content, number_characters]) #place_names.extend(q.place_names) #persons.extend(q.persons) #terms.extend(q.terms) place_names = list(set(place_names)) persons = list(set(persons)) terms = list(set(terms)) # Place names for leaflet # place_names js_data = json.dumps([[[50.5, 30.5], "test"]]) return render_to_response('browser/text_view_info.html', { 'tei_documents': qs, 'tei_transform': result, 'place_names': place_names, 'persons': persons, 'terms': terms, 'js_data': js_data, 'chapter_titles': sorted(chapter_titles) }, context_instance=RequestContext(request))
def text_info(request, title): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) result = "" place_names = [] persons = [] terms = [] chapter_titles = [] for q in qs: number_characters = 0 for d in q.body.div: text = re.sub(RE_INTERPUCTION, '', d.text) text = text.replace("\n", "") #text = text.replace("", "") number_characters += len(text) if q.chapter_title: content = q.chapter_title.replace(" ", "").replace("\n", "")[:70] else: content = 'XXX' if q.chapter: chapter = q.chapter else: chapter = 1 chapter_titles.append([chapter, content, number_characters]) #place_names.extend(q.place_names) #persons.extend(q.persons) #terms.extend(q.terms) place_names = list(set(place_names)) persons = list(set(persons)) terms = list(set(terms)) # Place names for leaflet # place_names js_data = json.dumps([[[50.5, 30.5], "test"]]) return render_to_response('browser/text_view_info.html', {'tei_documents': qs, 'tei_transform': result, 'place_names': place_names, 'persons': persons, 'terms': terms, 'js_data': js_data, 'chapter_titles': sorted(chapter_titles)}, context_instance=RequestContext(request))
def test_filter_gtelte(self): # < <= > >= # subclass to add a numeric field to test with class CountQueryTestModel(QueryTestModel): name_count = xmlmap.IntegerField('count(name)') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=CountQueryTestModel) # each fixture has one and only one name self.assertEqual(0, qs.filter(name_count__gt=1).count()) self.assertEqual(4, qs.filter(name_count__gte=1).count()) self.assertEqual(4, qs.filter(name_count__lte=1).count()) self.assertEqual(0, qs.filter(name_count__lt=1).count())
def visual_timeline(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) persons = [] for q in qs: persons.extend(q.persons) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_persons = {} if sparql_result: for binding in sparql_result.bindings: sparql_persons[binding[u"person"].value] = [ binding[u"birthYear"].value, binding[u"deathYear"].value ] #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"] timeline_persons = [] for p in set(persons): if sparql_persons.get(p, None): row = [ p, ] row.append(int(sparql_persons[p][0])) row.append(int(sparql_persons[p][1])) timeline_persons.append(row) from operator import itemgetter timeline_persons = sorted(timeline_persons, key=itemgetter(1)) timeline_persons = json.dumps(timeline_persons) return render_to_response('r/visual_timeline.html', { 'tei_documents': qs, 'timeline_persons': timeline_persons, 'juan': juan }, context_instance=RequestContext(request))
def test_also_raw(self): class SubqueryTestModel(QueryTestModel): myid = xmlmap.StringField('@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root') qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)') self.assertEqual('abc', qs[0].myid) # filtered version of the queryset with raw obj = qs.filter(name='two').get() self.assertEqual('abc', obj.myid) # multiple parameters obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name)
def text_download(request, title, file_format, juan=0): """ Download a text or a single chapter as plain text file or as a (colored) pdf. """ import pinyin pinyin_title = pinyin.get(title) qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) if juan: qs = qs.filter(chapter=juan) result = "" for q in qs: for d in q.body.div: result += d.text.replace(" ", "").replace("\n", "").replace( "\t", "").replace(u"。", u"。\n\n") if file_format == 'txt': response = HttpResponse(content_type='text/plain') response[ 'Content-Disposition'] = 'attachment; filename="{}.txt"'.format( pinyin_title) response.write(result) else: from fpdf import FPDF pdf = FPDF(unit='mm', format='A4') pdf.add_page() pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True) pdf.set_font('Droid', '', 12) pdf.write(5, unicode(result)) response = HttpResponse(pdf.output(dest='S'), content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename="{}.pdf"'.format( pinyin_title) return response
def index(request): # XML and SPARQL numbers # Count texts and authors qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(chapter='1') qs = qs.only('title', 'title_en', 'author') # TODO: order by title qs = qs.order_by('title_en') number_texts = qs.count() number_authors = qs.distinct().count() wiki_pages = [] for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")): wiki_pages.append([page.replace(" ", "%20"), page]) data = { 'number_texts': number_texts, 'number_authors': number_authors, 'tei_documents': qs, "wiki_pages": wiki_pages, } return render(request, 'roche/index.html', data)
def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) # create index for collection - should be applied to newly loaded files self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
def text_view(request, title): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) # filter by title qs = qs.filter(title=title).order_by('chapter') max_juan = qs.count() result = "" for q in qs: result = result + q.body.xsl_transform(xsl=XSL_TRANSFORM_1).serialize() text_title = qs[0].title data = {'tei_documents': qs, 'tei_transform': result, 'text_title': text_title, 'max_juan': max_juan, } return render_to_response('browser/text_view.html', data, context_instance=RequestContext(request))
def visual_places(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) places = [] for q in qs: places.extend(q.place_names) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_places = {} return render_to_response('r/visual_places.html', {'tei_documents': qs, 'places': places, 'juan': juan, }, context_instance=RequestContext(request))
def render(self, context): from browser.models import DDBCPlaceName try: self.place_name = self.place_name.resolve(context) except template.VariableDoesNotExist: return '' qs = QuerySet(using=ExistDB(), xpath='/tei:TEI//tei:place', collection='docker/resources/', model=DDBCPlaceName) qs = qs.filter(place_names=self.place_name) ddbc_output = u'' for q in qs: ddbc_output += '<p>' ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>' ddbc_output += 'District: ' + q.district + '<br>' ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>' ddbc_output += 'Location: ' + q.geo + '<br>' ddbc_output += '</p>' return ddbc_output
def text_download(request, title, file_format, juan=0): """ Download a text or a single chapter as plain text file or as a (colored) pdf. """ import pinyin pinyin_title = pinyin.get(title) qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title) if juan: qs = qs.filter(chapter=juan) result = "" for q in qs: for d in q.body.div: result += d.text.replace(" ", "").replace("\n", "").replace("\t", "").replace(u"。", u"。\n\n") if file_format == 'txt': response = HttpResponse(content_type='text/plain') response['Content-Disposition'] = 'attachment; filename="{}.txt"'.format(pinyin_title) response.write(result) else: from fpdf import FPDF pdf = FPDF(unit='mm', format='A4') pdf.add_page() pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True) pdf.set_font('Droid', '', 12) pdf.write(5, unicode(result)) response = HttpResponse(pdf.output(dest='S'), content_type='application/pdf') response['Content-Disposition'] = 'attachment; filename="{}.pdf"'.format(pinyin_title) return response
def visual_timeline(request, title, juan): qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(title=title, chapter=juan) persons = [] for q in qs: persons.extend(q.persons) sparql = SPARQLWrapper2(FUSEKI_QUERY_URL) sparql.setQuery(SPARQL_TIMELINE_QUERY) try: sparql_result = sparql.query() except: sparql_result = {} sparql_persons = {} if sparql_result: for binding in sparql_result.bindings: sparql_persons[binding[u"person"].value] = [binding[u"birthYear"].value, binding[u"deathYear"].value] #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"] timeline_persons = [] for p in set(persons): if sparql_persons.get(p, None): row = [p, ] row.append(int(sparql_persons[p][0])) row.append(int(sparql_persons[p][1])) timeline_persons.append(row) from operator import itemgetter timeline_persons = sorted(timeline_persons, key=itemgetter(1)) timeline_persons = json.dumps(timeline_persons) return render_to_response('r/visual_timeline.html', {'tei_documents': qs, 'timeline_persons': timeline_persons, 'juan': juan}, context_instance=RequestContext(request))
def get_query_set(self): """ Get the default :class:`eulexistdb.db.QuerySet` returned by this ``Manager``. Typically this returns a ``QuerySet`` based on the ``Manager``'s `xpath`, evaluated in the ``settings.EXISTDB_ROOT_COLLECTION`` on a default :class:`eulexistdb.db.ExistDB`. This is a convenient point for developers to customize an object's managers. Deriving a child class from Manager and overriding or extending this method is a handy way to create custom queries accessible from an :class:`~eulexistdb.models.XmlModel`. """ if hasattr(settings, 'EXISTDB_FULLTEXT_OPTIONS'): fulltext_opts = settings.EXISTDB_FULLTEXT_OPTIONS else: fulltext_opts = {} return QuerySet(model=self.model, xpath=self.xpath, using=ExistDB(), collection=settings.EXISTDB_ROOT_COLLECTION, fulltext_options=fulltext_opts)
def index(request): # XML and SPARQL numbers # Count texts and authors qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) qs = qs.filter(chapter='1') qs = qs.only('title', 'title_en', 'author') # TODO: order by title qs = qs.order_by('title_en') number_texts = qs.count() number_authors = qs.distinct().count() wiki_pages = [] for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")): wiki_pages.append([page.replace(" ", "%20"), page]) data = {'number_texts': number_texts, 'number_authors': number_authors, 'tei_documents': qs, "wiki_pages": wiki_pages, } return render(request, 'roche/index.html', data)
# Load resources # for (dirpath, dirnames, filenames) in walk('resources'): xmldb.createCollection('docker' + '/' + dirpath, True) if filenames: for filename in filenames: with open(dirpath + '/' + filename) as f: xmldb.load(f, os.path.join('docker', dirpath, filename), True) # # Load TEI into solr # si = sunburnt.SolrInterface(SOLR_SERVER_URL + '/') qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI) i = 0 for q in qs: print i doc = collections.defaultdict(list) for div in q.body.div: text = div.text.replace(" ", "").replace("\n", "") doc["text"].append(text) i = i + 1 doc['id'] = q.title + '/' + str(q.chapter) doc['title'] = q.title doc['author'] = q.author
def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
class ExistQueryTest(unittest.TestCase): def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel) def tearDown(self): self.db.removeCollection(COLLECTION) def test_count(self): load_fixtures(self.db) self.assertEqual(NUM_FIXTURES, self.qs.count(), "queryset count returns number of fixtures") def test_getitem(self): qs = self.qs.order_by('id') # adding sort order to test reliably self.assertEqual("abc", qs[0].id) self.assertEqual("def", qs[1].id) self.assertEqual("one", qs[2].id) self.assertEqual("xyz", qs[3].id) def test_getitem_typeerror(self): self.assertRaises(TypeError, self.qs.__getitem__, "foo") def test_getitem_indexerror(self): self.assertRaises(IndexError, self.qs.__getitem__, -1) self.assertRaises(IndexError, self.qs.__getitem__, 23) def test_getslice(self): slice = self.qs.order_by('id')[0:2] self.assert_(isinstance(slice, QuerySet)) self.assert_(isinstance(slice[0], QueryTestModel)) self.assertEqual(2, slice.count()) self.assertEqual(2, len(slice)) self.assertEqual('abc', slice[0].id) self.assertEqual('def', slice[1].id) self.assertRaises(IndexError, slice.__getitem__, 2) slice = self.qs.order_by('id')[1:3] self.assertEqual('def', slice[0].id) self.assertEqual('one', slice[1].id) slice = self.qs.order_by('id')[3:5] self.assertEqual(1, slice.count()) self.assertEqual('xyz', slice[0].id) self.assertRaises(IndexError, slice.__getitem__, 1) # test slicing with unspecified bounds slice = self.qs.order_by('id')[:2] self.assertEqual(2, slice.count()) self.assertEqual('def', slice[1].id) slice = self.qs.order_by('id')[1:] self.assertEqual(3, slice.count()) self.assertEqual('one', slice[1].id) self.assertEqual('xyz', slice[2].id) def test_filter(self): fqs = self.qs.filter(contains="two") self.assertEqual(1, fqs.count(), "count returns 1 when filtered - contains 'two'") self.assertEqual("two", fqs[0].name, "name matches filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field(self): fqs = self.qs.filter(name="one") self.assertEqual(1, fqs.count(), "count returns 1 when filtered on name = 'one' (got %s)" % self.qs.count()) self.assertEqual("one", fqs[0].name, "name matches filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_xpath(self): fqs = self.qs.filter(id="abc") self.assertEqual(1, fqs.count(), "count returns 1 when filtered on @id = 'abc' (got %s)" % self.qs.count()) self.assertEqual("two", fqs[0].name, "name returned is correct for id filter") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_contains(self): fqs = self.qs.filter(name__contains="o") self.assertEqual(3, fqs.count(), "should get 3 matches for filter on name contains 'o' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_contains_special(self): fqs = self.qs.filter(description__contains=' "quote" ') self.assertEqual(1, fqs.count(), "should get 1 match for filter on desc contains ' \"quote\" ' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") fqs = self.qs.filter(description__contains=' &!') self.assertEqual(1, fqs.count(), "should get 1 match for filter on desc contains ' &!' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_field_startswith(self): fqs = self.qs.filter(name__startswith="o") self.assertEqual(1, fqs.count(), "should get 1 match for filter on name starts with 'o' (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_subobject_field(self): fqs = self.qs.filter(sub__subname="la") self.assertEqual(1, fqs.count(), "should get 1 match for filter on sub_subname = 'la' (got %s)" % fqs.count()) def test_filter_in(self): fqs = self.qs.filter(id__in=['abc', 'xyz', 'qrs']) self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on id in list (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") # filtering on a special field - should still be able to return/access it via only fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \ .only('id', 'document_name').order_by('document_name') self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual('f1.xml', fqs[0].document_name) fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \ .also('id', 'document_name').order_by('document_name') self.assertEqual( 2, fqs.count(), "should get 2 matches for filter on document name in list (got %s)" % fqs.count()) self.assertEqual('f1.xml', fqs[0].document_name) def test_filter_exists(self): fqs = self.qs.filter(id__exists=True) self.assertEqual(4, fqs.count(), "filter on id exists=true returns all documents") fqs = self.qs.filter(id__exists=False) self.assertEqual(0, fqs.count(), "filter on id exists=false returns no documents") fqs = self.qs.filter(wnn__exists=False) self.assertEqual(3, fqs.count(), "filter on wacky node name exists=false returns 3 documents") def test_or_filter(self): fqs = self.qs.or_filter(id='abc', name='four').only('id') self.assertEqual( 2, fqs.count(), "should get 2 matches for OR filter on id='abc' or name='four' (got %s)" % fqs.count()) ids = [obj.id for obj in fqs.all()] self.assert_('abc' in ids, 'id "abc" in list of ids when OR filter includes id="abc"') self.assert_('def' in ids, 'id "def" in list of ids when OR filter includes name="four') def test_exclude(self): fqs = self.qs.exclude(id='abc', name='one').only('id') self.assertEqual( 2, fqs.count(), "should get 2 matches for exclude filter on id='abc' or name='one' (got %s)" % fqs.count()) ids = [obj.id for obj in fqs.all()] self.assert_('abc' not in ids, 'id "abc" should not be in list of ids when exclude id="abc"') def test_filter_gtelte(self): # < <= > >= # subclass to add a numeric field to test with class CountQueryTestModel(QueryTestModel): name_count = xmlmap.IntegerField('count(name)') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=CountQueryTestModel) # each fixture has one and only one name self.assertEqual(0, qs.filter(name_count__gt=1).count()) self.assertEqual(4, qs.filter(name_count__gte=1).count()) self.assertEqual(4, qs.filter(name_count__lte=1).count()) self.assertEqual(0, qs.filter(name_count__lt=1).count()) def test_filter_document_path(self): # get full test path to first document item = self.qs.filter(name='one').only('document_name', 'collection_name').get() path = '%s/%s' % (item.collection_name, item.document_name) # fqs = self.qs.filter(document_path=path, name='one') self.assertEqual(1, fqs.count()) fqs = self.qs.filter(document_path=path, name='two') self.assertEqual(0, fqs.count()) def test_get(self): result = self.qs.get(contains="two") self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result") self.assertEqual(result.name, "two", "result returned by get() has correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_get_toomany(self): self.assertRaises(ReturnedMultiple, self.qs.get, contains="one") def test_get_nomatch(self): self.assertRaises(DoesNotExist, self.qs.get, contains="fifty-four") def test_get_byname(self): result = self.qs.get(name="one") self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result") self.assertEqual(result.name, "one", "result returned by get() has correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_filter_get(self): result = self.qs.filter(contains="one").filter(name="two").get() self.assert_(isinstance(result, QueryTestModel)) self.assertEqual("two", result.name, "filtered get() returns correct data") self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_reset(self): self.qs.filter(contains="two") self.qs.reset() self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter") def test_order_by(self): # element fqs = self.qs.order_by('name') self.assertEqual('four', fqs[0].name) self.assertEqual('one', fqs[1].name) self.assertEqual('three', fqs[2].name) self.assertEqual('two', fqs[3].name) self.assert_('order by ' not in self.qs.query.getQuery(), "main queryset unchanged by order_by()") # attribute fqs = self.qs.order_by('id') self.assertEqual('abc', fqs[0].id) self.assertEqual('def', fqs[1].id) self.assertEqual('one', fqs[2].id) self.assertEqual('xyz', fqs[3].id) # reverse sorting fqs = self.qs.order_by('-name') self.assertEqual('four', fqs[3].name) self.assertEqual('two', fqs[0].name) fqs = self.qs.order_by('-id') self.assertEqual('abc', fqs[3].id) self.assertEqual('xyz', fqs[0].id) # case-insensitive sorting - upper-case description should not sort first fqs = self.qs.order_by('~description') self.assert_(fqs[0].description.startswith('third')) self.assert_(fqs[1].description.startswith('This one contains')) # reverse case-insensitive sorting - flags in either order fqs = self.qs.order_by('~-description') self.assert_(fqs[3].description.startswith('third')) fqs = self.qs.order_by('-~description') self.assert_(fqs[3].description.startswith('third')) def test_only(self): self.qs.only('name') self.assert_('element name {' not in self.qs.query.getQuery(), "main queryset unchanged by only()") fqs = self.qs.filter(id='one').only('name', 'id', 'sub', 'or_field') self.assert_(isinstance(fqs[0], QueryTestModel)) # actually a Partial type derived from this # attributes that should be present self.assertNotEqual(fqs[0].id, None) self.assertNotEqual(fqs[0].sub, None) self.assertNotEqual(fqs[0].sub.subname, None) self.assertNotEqual(fqs[0].or_field, None) # attribute not returned self.assertEqual(fqs[0].description, None) self.assertEqual('one', fqs[0].id) self.assertEqual('one', fqs[0].name) self.assertEqual('la', fqs[0].sub.subname) self.assertEqual('one', fqs[0].or_field) # = name (first of ORed fields present) fqs = self.qs.filter(id='one').only('wnn') self.assertTrue(hasattr(fqs[0], "wnn")) self.assertEqual(42, fqs[0].wnn) # nested field return fqs = self.qs.filter(id='one').only('name', 'id', 'sub__subname') self.assertEqual('la', fqs[0].sub.subname) # xpath function return fqs = self.qs.filter(id='one').only('substring') self.assertEqual('o', fqs[0].substring) # sub-subclass fqs = self.qs.filter(id='one').only('sub__ssc') self.assert_(isinstance(fqs[0], QueryTestModel)) def test_only_hash(self): fqs = self.qs.only('hash') # no filters, should return all 3 test objects for result in fqs: # each return object should have a 40-character SHA-1 hash checksum self.assertEqual(40, len(result.hash), 'xquery result should have 40-character checksum, got %s' % result.hash) def test_document_name(self): fqs = self.qs.filter(id='one').only('document_name') # document_name attribute should be present self.assertNotEqual(fqs[0].document_name, None) self.assertEqual(fqs[0].document_name, "f1.xml") fqs = self.qs.filter(id='one').also('document_name') self.assertNotEqual(fqs[0].document_name, None) self.assertEqual(fqs[0].document_name, "f1.xml") def test_collection_name(self): fqs = self.qs.filter(id='one').only('collection_name') self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION) fqs = self.qs.filter(id='one').also('collection_name') self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION) def test_only_lastmodified(self): fqs = self.qs.only('last_modified') # no filters, should return all 3 test objects for result in fqs: self.assert_(isinstance(result.last_modified, datetime)) def test_iter(self): for q in self.qs: self.assert_(isinstance(q, QueryTestModel)) def test_slice_iter(self): i = 0 for q in self.qs[1:2]: i += 1 self.assertEqual(1, i) def test_also(self): class SubqueryTestModel(xmlmap.XmlObject): name = xmlmap.StringField('.') parent_id = xmlmap.StringField('parent::root/@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name') name = qs.also('parent_id').get(name__exact='two') self.assertEqual('abc', name.parent_id, "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '" + name.parent_id + "'") def test_also_subfield(self): class SubqueryTestModel(xmlmap.XmlObject): subname = xmlmap.StringField('subname') parent = xmlmap.NodeField('parent::root', QueryTestModel) qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub') name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la') self.assertEqual('la', name.subname) self.assertEqual('one', name.parent.id) self.assertEqual(42, name.parent.wnn) def test_also_raw(self): class SubqueryTestModel(QueryTestModel): myid = xmlmap.StringField('@id') qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root') qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)') self.assertEqual('abc', qs[0].myid) # filtered version of the queryset with raw obj = qs.filter(name='two').get() self.assertEqual('abc', obj.myid) # multiple parameters obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name) def test_only_raw(self): qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two') self.assertEqual('abc', qs[0].id) # filtered version obj = qs.get() self.assertEqual('abc', obj.id) # when combined with regular only, other fields come back correctly qs = self.qs.only('name', 'description', 'substring') obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc') self.assertEqual('two', obj.name) self.assertEqual('t', obj.substring) self.assertEqual('this one only has two', obj.description) self.assertEqual('abc', obj.id) # subfield obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one') self.assertEqual('la', obj.sub.subname) # multiple parameters obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)', name='normalize-space(%(xq_var)s//name)').get(id='abc') self.assertEqual('abc', obj.id) self.assertEqual('two', obj.name) # list field - multiple return values class MyQueryTest(QueryTestModel): name = xmlmap.StringListField('name') qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest) # return one object but find all the names in the test collection obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc') # 4 names in test fixtures - should come back as a list of those 4 names self.assertEqual(4, len(obj.name)) def test_getDocument(self): obj = self.qs.getDocument("f1.xml") self.assert_(isinstance(obj, QueryTestModel), "object returned by getDocument is instance of QueryTestModel") self.assertEqual("one", obj.name) def test_distinct(self): qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name') vals = qs.distinct() self.assert_('one' in vals) self.assert_('two' in vals) self.assert_('three' in vals) self.assert_('four' in vals) self.assert_('abc' not in vals) def test_namespaces(self): # filter on a field with a namespace fqs = self.qs.filter(nsfield='namespaced').all() self.assertEqual('namespaced', fqs[0].nsfield)
class ExistQueryTest__FullText(unittest.TestCase): # when full-text indexing is enabled, eXist must index files when they are loaded to the db # this makes tests *significantly* slower # any tests that require full-text queries should be here # sample lucene configuration for testing full-text queries FIXTURE_INDEX = ''' <collection xmlns="http://exist-db.org/collection-config/1.0"> <index> <lucene> <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/> <text qname="description"/> <text qname="root"/> </lucene> </index> </collection> ''' def setUp(self): self.db = ExistDB(server_url=EXISTDB_SERVER_URL) # create index for collection - should be applied to newly loaded files self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX) load_fixtures(self.db) self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel) def tearDown(self): self.db.removeCollection(COLLECTION) self.db.removeCollectionIndex(COLLECTION) def test_filter_fulltext_terms(self): fqs = self.qs.filter(description__fulltext_terms='only two') self.assertEqual(1, fqs.count(), "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count()) def test_filter_fulltext_options(self): qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel, fulltext_options={'default-operator': 'and'}) # search for terms present in fixtures - but not both present in one doc fqs = qs.filter(description__fulltext_terms='only third') # for now, just confirm that the option is passed through to query self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery()) # TODO: test this properly! # query options not supported in current version of eXist # self.assertEqual(0, fqs.count()) def test_order_by__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score') self.assertEqual('one', fqs[0].name) # one appears 3 times, should be first def test_only__fulltext_score(self): fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name') self.assert_(isinstance(fqs[0], QueryTestModel)) # actually a Partial type derived from this # fulltext score attribute should be present self.assertNotEqual(fqs[0].fulltext_score, None) self.assert_(float(fqs[0].fulltext_score) > 0.5) # full-text score should be a float def test_fulltext_highlight(self): fqs = self.qs.filter(description__fulltext_terms='only two') # result from fulltext search - by default, xml should have exist:match tags self.assert_('<exist:match' in fqs[0].serialize()) fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # order of args in the same filter should not matter fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two') # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) # separate filters should also work fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False) # with highlighting disabled, should not have exist:match tags self.assert_('<exist:match' not in fqs[0].serialize()) def test_highlight(self): fqs = self.qs.filter(highlight='supercalifragilistic') self.assertEqual(4, fqs.count(), "highlight filter returns all documents even though search term is not present") fqs = self.qs.filter(highlight='one').order_by('id') self.assert_('<exist:match' in fqs[0].serialize()) def test_match_count(self): fqs = self.qs.filter(id='one', highlight='one').only('match_count') self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found") def test_using(self): fqs = self.qs.using('new-collection') # using should update the collection on the xquery object self.assertEqual('new-collection', fqs.query.collection)