Example #1
0
    def test_only_raw(self):
        qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two')
        self.assertEqual('abc', qs[0].id)
        # filtered version
        obj = qs.get()
        self.assertEqual('abc', obj.id)

        # when combined with regular only, other fields come back correctly
        qs = self.qs.only('name', 'description', 'substring')
        obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc')
        self.assertEqual('two', obj.name)
        self.assertEqual('t', obj.substring)
        self.assertEqual('this one only has two', obj.description)
        self.assertEqual('abc', obj.id)

        # subfield
        obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one')
        self.assertEqual('la', obj.sub.subname)

        # multiple parameters
        obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)

        # list field - multiple return values
        class MyQueryTest(QueryTestModel):
            name = xmlmap.StringListField('name')
        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest)
        # return one object but find all the names in the test collection
        obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc')
        # 4 names in test fixtures - should come back as a list of those 4 names
        self.assertEqual(4, len(obj.name))
Example #2
0
def visual_places(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    places = []
    for q in qs:
        places.extend(q.place_names)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_places = {}

    return render_to_response('r/visual_places.html', {
        'tei_documents': qs,
        'places': places,
        'juan': juan,
    },
                              context_instance=RequestContext(request))
Example #3
0
def index_title(request, letter):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    # filter by titles starting with letter
    qs = qs.filter(title__startswith=letter)

    return render_to_response('browser/index.html', {'tei_documents': qs},
                              context_instance=RequestContext(request))
Example #4
0
 def test_filter_fulltext_options(self):
     qs = QuerySet(using=self.db, xpath='/root',
                   collection=COLLECTION, model=QueryTestModel,
                   fulltext_options={'default-operator': 'and'})
     # search for terms present in fixtures - but not both present in one doc
     fqs = qs.filter(description__fulltext_terms='only third')
     # for now, just confirm that the option is passed through to query
     self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery())
Example #5
0
 def test_distinct(self):
     qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name')
     vals = qs.distinct()
     self.assert_('one' in vals)
     self.assert_('two' in vals)
     self.assert_('three' in vals)
     self.assert_('four' in vals)
     self.assert_('abc' not in vals)
Example #6
0
def index(request):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # Make titles unique (maybe there is a better method?)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Example #7
0
    def test_also(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            name = xmlmap.StringField('.')
            parent_id = xmlmap.StringField('parent::root/@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name')
        name = qs.also('parent_id').get(name__exact='two')
        self.assertEqual('abc', name.parent_id,
                         "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '"
                         + name.parent_id + "'")
Example #8
0
    def test_also_subfield(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            subname = xmlmap.StringField('subname')
            parent = xmlmap.NodeField('parent::root', QueryTestModel)

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub')
        name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la')
        self.assertEqual('la', name.subname)
        self.assertEqual('one', name.parent.id)
        self.assertEqual(42, name.parent.wnn)
Example #9
0
def index(request):
    xmldb = ExistDB()
    qs = QuerySet(using=xmldb,
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI,
                  fulltext_options={'default-operator': 'and'})
    qs = qs.filter(body__fulltext_terms='至')

    return render_to_response('search/index.html', {'tei_documents': qs})
Example #10
0
def index_author(request, author, startswith):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    if startswith:
        # filter by authors starting with letter
        qs = qs.filter(author__startswith=author)
    else:
        qs = qs.filter(author=author)

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Example #11
0
def text_info(request, title):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)

    result = ""
    place_names = []
    persons = []
    terms = []
    chapter_titles = []
    for q in qs:
        number_characters = 0
        for d in q.body.div:
            text = re.sub(RE_INTERPUCTION, '', d.text)
            text = text.replace("\n", "")
            #text = text.replace("", "")
            number_characters += len(text)

        if q.chapter_title:
            content = q.chapter_title.replace(" ", "").replace("\n", "")[:70]
        else:
            content = 'XXX'

        if q.chapter:
            chapter = q.chapter
        else:
            chapter = 1

        chapter_titles.append([chapter, content, number_characters])

        #place_names.extend(q.place_names)
        #persons.extend(q.persons)
        #terms.extend(q.terms)

    place_names = list(set(place_names))
    persons = list(set(persons))
    terms = list(set(terms))

    # Place names for leaflet
    # place_names
    js_data = json.dumps([[[50.5, 30.5], "test"]])

    return render_to_response('browser/text_view_info.html', {
        'tei_documents': qs,
        'tei_transform': result,
        'place_names': place_names,
        'persons': persons,
        'terms': terms,
        'js_data': js_data,
        'chapter_titles': sorted(chapter_titles)
    },
                              context_instance=RequestContext(request))
Example #12
0
def text_info(request, title):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    qs = qs.filter(title=title)

    result = ""
    place_names = []
    persons = []
    terms = []
    chapter_titles = []
    for q in qs:
        number_characters = 0
        for d in q.body.div:
            text = re.sub(RE_INTERPUCTION, '', d.text)
            text = text.replace("\n", "")
            #text = text.replace("", "")
            number_characters += len(text)


        if q.chapter_title:
             content = q.chapter_title.replace(" ", "").replace("\n", "")[:70]
        else:
             content = 'XXX'

	if q.chapter:
             chapter = q.chapter
        else:
             chapter = 1

        chapter_titles.append([chapter,
                               content,
                               number_characters])

        #place_names.extend(q.place_names)
        #persons.extend(q.persons)
        #terms.extend(q.terms)

    place_names = list(set(place_names))
    persons = list(set(persons))
    terms = list(set(terms))

    # Place names for leaflet
    # place_names
    js_data = json.dumps([[[50.5, 30.5], "test"]])

    return render_to_response('browser/text_view_info.html', {'tei_documents': qs,
                              'tei_transform': result, 'place_names': place_names,
                              'persons': persons, 'terms': terms, 'js_data': js_data,
                              'chapter_titles': sorted(chapter_titles)}, context_instance=RequestContext(request))
Example #13
0
    def test_filter_gtelte(self):
        # < <= > >=

        # subclass to add a numeric field to test with
        class CountQueryTestModel(QueryTestModel):
            name_count = xmlmap.IntegerField('count(name)')

        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION,
                      model=CountQueryTestModel)

        # each fixture has one and only one name
        self.assertEqual(0, qs.filter(name_count__gt=1).count())
        self.assertEqual(4, qs.filter(name_count__gte=1).count())
        self.assertEqual(4, qs.filter(name_count__lte=1).count())
        self.assertEqual(0, qs.filter(name_count__lt=1).count())
Example #14
0
def visual_timeline(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    persons = []
    for q in qs:
        persons.extend(q.persons)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_persons = {}
    if sparql_result:
        for binding in sparql_result.bindings:
            sparql_persons[binding[u"person"].value] = [
                binding[u"birthYear"].value, binding[u"deathYear"].value
            ]

    #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"]
    timeline_persons = []
    for p in set(persons):
        if sparql_persons.get(p, None):
            row = [
                p,
            ]
            row.append(int(sparql_persons[p][0]))
            row.append(int(sparql_persons[p][1]))
            timeline_persons.append(row)

    from operator import itemgetter

    timeline_persons = sorted(timeline_persons, key=itemgetter(1))
    timeline_persons = json.dumps(timeline_persons)

    return render_to_response('r/visual_timeline.html', {
        'tei_documents': qs,
        'timeline_persons': timeline_persons,
        'juan': juan
    },
                              context_instance=RequestContext(request))
Example #15
0
    def test_also_raw(self):
        class SubqueryTestModel(QueryTestModel):
            myid = xmlmap.StringField('@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root')
        qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)')
        self.assertEqual('abc', qs[0].myid)
        # filtered version of the queryset with raw
        obj = qs.filter(name='two').get()
        self.assertEqual('abc', obj.myid)

        # multiple parameters
        obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)
Example #16
0
def text_download(request, title, file_format, juan=0):
    """
    Download a text or a single chapter as plain text file
    or as a (colored) pdf.
    """
    import pinyin

    pinyin_title = pinyin.get(title)

    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)
    if juan:
        qs = qs.filter(chapter=juan)

    result = ""
    for q in qs:
        for d in q.body.div:
            result += d.text.replace(" ", "").replace("\n", "").replace(
                "\t", "").replace(u"。", u"。\n\n")

    if file_format == 'txt':
        response = HttpResponse(content_type='text/plain')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.txt"'.format(
                pinyin_title)
        response.write(result)
    else:
        from fpdf import FPDF

        pdf = FPDF(unit='mm', format='A4')
        pdf.add_page()
        pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True)
        pdf.set_font('Droid', '', 12)
        pdf.write(5, unicode(result))
        response = HttpResponse(pdf.output(dest='S'),
                                content_type='application/pdf')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.pdf"'.format(
                pinyin_title)

    return response
Example #17
0
def index(request):
    # XML and SPARQL numbers

    # Count texts and authors
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')
    # TODO: order by title
    qs = qs.order_by('title_en')

    number_texts = qs.count()
    number_authors = qs.distinct().count()

    wiki_pages = []
    for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")):
        wiki_pages.append([page.replace(" ", "%20"), page])

    data = {
        'number_texts': number_texts,
        'number_authors': number_authors,
        'tei_documents': qs,
        "wiki_pages": wiki_pages,
    }

    return render(request, 'roche/index.html', data)
Example #18
0
    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        # create index for collection - should be applied to newly loaded files
        self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX)

        load_fixtures(self.db)

        self.qs = QuerySet(using=self.db, xpath='/root',
                           collection=COLLECTION, model=QueryTestModel)
Example #19
0
def text_view(request, title):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # filter by title
    qs = qs.filter(title=title).order_by('chapter')

    max_juan = qs.count()

    result = ""
    for q in qs:
        result = result + q.body.xsl_transform(xsl=XSL_TRANSFORM_1).serialize()

    text_title = qs[0].title

    data = {'tei_documents': qs, 'tei_transform': result,
            'text_title': text_title, 'max_juan': max_juan, }

    return render_to_response('browser/text_view.html', data,
                              context_instance=RequestContext(request))
Example #20
0
def visual_places(request, title, juan):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    places = []
    for q in qs:
        places.extend(q.place_names)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_places = {} 

    return render_to_response('r/visual_places.html',
                              {'tei_documents': qs, 'places': places, 'juan': juan, },
                              context_instance=RequestContext(request))
Example #21
0
    def render(self, context):
        from browser.models import DDBCPlaceName

        try:
            self.place_name = self.place_name.resolve(context)
        except template.VariableDoesNotExist:
            return ''

        qs = QuerySet(using=ExistDB(), xpath='/tei:TEI//tei:place', collection='docker/resources/', model=DDBCPlaceName)
        qs = qs.filter(place_names=self.place_name)

        ddbc_output = u''
        for q in qs:
            ddbc_output += '<p>'
            ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>'
            ddbc_output += 'District: ' + q.district + '<br>'
            ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>'
            ddbc_output += 'Location: ' + q.geo + '<br>'
            ddbc_output += '</p>'

        return ddbc_output
Example #22
0
def text_download(request, title, file_format, juan=0):
    """
    Download a text or a single chapter as plain text file
    or as a (colored) pdf.
    """
    import pinyin

    pinyin_title = pinyin.get(title)

    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI',
                  collection='docker/texts/', model=RocheTEI)

    qs = qs.filter(title=title)
    if juan:
        qs = qs.filter(chapter=juan)


    result = ""
    for q in qs:
        for d in q.body.div:
            result += d.text.replace(" ", "").replace("\n", "").replace("\t", "").replace(u"。", u"。\n\n")

    if file_format == 'txt':
        response = HttpResponse(content_type='text/plain')
        response['Content-Disposition'] = 'attachment; filename="{}.txt"'.format(pinyin_title)
        response.write(result)
    else:
        from fpdf import FPDF

        pdf = FPDF(unit='mm', format='A4')
        pdf.add_page()
        pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True)
        pdf.set_font('Droid', '', 12)
        pdf.write(5, unicode(result))
        response = HttpResponse(pdf.output(dest='S'), content_type='application/pdf') 
        response['Content-Disposition'] = 'attachment; filename="{}.pdf"'.format(pinyin_title)

    return response
Example #23
0
def visual_timeline(request, title, juan):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    persons = []
    for q in qs:
        persons.extend(q.persons)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_persons = {} 
    if sparql_result:
        for binding in sparql_result.bindings:
            sparql_persons[binding[u"person"].value] = [binding[u"birthYear"].value, binding[u"deathYear"].value]

    #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"]
    timeline_persons = []
    for p in set(persons):
        if sparql_persons.get(p, None):
             row = [p, ]
             row.append(int(sparql_persons[p][0]))
             row.append(int(sparql_persons[p][1]))
             timeline_persons.append(row)

    from operator import itemgetter

    timeline_persons = sorted(timeline_persons, key=itemgetter(1))
    timeline_persons = json.dumps(timeline_persons)

    return render_to_response('r/visual_timeline.html',
                              {'tei_documents': qs, 'timeline_persons': timeline_persons,
                              'juan': juan}, context_instance=RequestContext(request))
Example #24
0
    def render(self, context):
        from browser.models import DDBCPlaceName

        try:
            self.place_name = self.place_name.resolve(context)
        except template.VariableDoesNotExist:
            return ''

        qs = QuerySet(using=ExistDB(),
                      xpath='/tei:TEI//tei:place',
                      collection='docker/resources/',
                      model=DDBCPlaceName)
        qs = qs.filter(place_names=self.place_name)

        ddbc_output = u''
        for q in qs:
            ddbc_output += '<p>'
            ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>'
            ddbc_output += 'District: ' + q.district + '<br>'
            ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>'
            ddbc_output += 'Location: ' + q.geo + '<br>'
            ddbc_output += '</p>'

        return ddbc_output
Example #25
0
    def get_query_set(self):
        """
        Get the default :class:`eulexistdb.db.QuerySet` returned
        by this ``Manager``. Typically this returns a ``QuerySet`` based on
        the ``Manager``'s `xpath`, evaluated in the
        ``settings.EXISTDB_ROOT_COLLECTION`` on a default
        :class:`eulexistdb.db.ExistDB`.

        This is a convenient point for developers to customize an object's
        managers. Deriving a child class from Manager and overriding or
        extending this method is a handy way to create custom queries
        accessible from an :class:`~eulexistdb.models.XmlModel`.
        """

        if hasattr(settings, 'EXISTDB_FULLTEXT_OPTIONS'):
            fulltext_opts = settings.EXISTDB_FULLTEXT_OPTIONS
        else:
            fulltext_opts = {}

        return QuerySet(model=self.model,
                        xpath=self.xpath,
                        using=ExistDB(),
                        collection=settings.EXISTDB_ROOT_COLLECTION,
                        fulltext_options=fulltext_opts)
Example #26
0
def index(request):
    # XML and SPARQL numbers

    # Count texts and authors
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')
    # TODO: order by title
    qs = qs.order_by('title_en')

    number_texts = qs.count()
    number_authors = qs.distinct().count()

    wiki_pages = []
    for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")):
        wiki_pages.append([page.replace(" ", "%20"), page])

    data = {'number_texts': number_texts, 'number_authors': number_authors,
            'tei_documents': qs, "wiki_pages": wiki_pages, }

    return render(request, 'roche/index.html', data)
Example #27
0
# Load resources
#
for (dirpath, dirnames, filenames) in walk('resources'):
    xmldb.createCollection('docker' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                xmldb.load(f, os.path.join('docker', dirpath, filename), True)

#
# Load TEI into solr
#
si = sunburnt.SolrInterface(SOLR_SERVER_URL + '/')

qs = QuerySet(using=ExistDB(),
              xpath='/tei:TEI',
              collection='docker/texts/',
              model=RocheTEI)

i = 0
for q in qs:
    print i

    doc = collections.defaultdict(list)
    for div in q.body.div:
        text = div.text.replace(" ", "").replace("\n", "")
        doc["text"].append(text)

    i = i + 1
    doc['id'] = q.title + '/' + str(q.chapter)
    doc['title'] = q.title
    doc['author'] = q.author
Example #28
0
 def setUp(self):
     self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
     load_fixtures(self.db)
     self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
Example #29
0
class ExistQueryTest(unittest.TestCase):

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        load_fixtures(self.db)
        self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)

    def tearDown(self):
        self.db.removeCollection(COLLECTION)

    def test_count(self):
        load_fixtures(self.db)
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "queryset count returns number of fixtures")

    def test_getitem(self):
        qs = self.qs.order_by('id')     # adding sort order to test reliably
        self.assertEqual("abc", qs[0].id)
        self.assertEqual("def", qs[1].id)
        self.assertEqual("one", qs[2].id)
        self.assertEqual("xyz", qs[3].id)

    def test_getitem_typeerror(self):
        self.assertRaises(TypeError, self.qs.__getitem__, "foo")

    def test_getitem_indexerror(self):
        self.assertRaises(IndexError, self.qs.__getitem__, -1)
        self.assertRaises(IndexError, self.qs.__getitem__, 23)

    def test_getslice(self):
        slice = self.qs.order_by('id')[0:2]
        self.assert_(isinstance(slice, QuerySet))
        self.assert_(isinstance(slice[0], QueryTestModel))
        self.assertEqual(2, slice.count())
        self.assertEqual(2, len(slice))
        self.assertEqual('abc', slice[0].id)
        self.assertEqual('def', slice[1].id)
        self.assertRaises(IndexError, slice.__getitem__, 2)

        slice = self.qs.order_by('id')[1:3]
        self.assertEqual('def', slice[0].id)
        self.assertEqual('one', slice[1].id)

        slice = self.qs.order_by('id')[3:5]
        self.assertEqual(1, slice.count())
        self.assertEqual('xyz', slice[0].id)
        self.assertRaises(IndexError, slice.__getitem__, 1)

        # test slicing with unspecified bounds
        slice = self.qs.order_by('id')[:2]
        self.assertEqual(2, slice.count())
        self.assertEqual('def', slice[1].id)

        slice = self.qs.order_by('id')[1:]
        self.assertEqual(3, slice.count())
        self.assertEqual('one', slice[1].id)
        self.assertEqual('xyz', slice[2].id)

    def test_filter(self):
        fqs = self.qs.filter(contains="two")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered - contains 'two'")
        self.assertEqual("two", fqs[0].name, "name matches filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field(self):
        fqs = self.qs.filter(name="one")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered on name = 'one' (got %s)"
                         % self.qs.count())
        self.assertEqual("one", fqs[0].name, "name matches filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_xpath(self):
        fqs = self.qs.filter(id="abc")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered on @id = 'abc' (got %s)"
                         % self.qs.count())
        self.assertEqual("two", fqs[0].name, "name returned is correct for id filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_contains(self):
        fqs = self.qs.filter(name__contains="o")
        self.assertEqual(3, fqs.count(),
                         "should get 3 matches for filter on name contains 'o' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_contains_special(self):
        fqs = self.qs.filter(description__contains=' "quote" ')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on desc contains ' \"quote\" ' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        fqs = self.qs.filter(description__contains=' &!')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on desc contains ' &!' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_startswith(self):
        fqs = self.qs.filter(name__startswith="o")
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on name starts with 'o' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_subobject_field(self):
        fqs = self.qs.filter(sub__subname="la")
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on sub_subname = 'la' (got %s)" % fqs.count())

    def test_filter_in(self):
        fqs = self.qs.filter(id__in=['abc', 'xyz', 'qrs'])
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on id in list (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml'])
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        # filtering on a special field - should still be able to return/access it via only
        fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \
                     .only('id', 'document_name').order_by('document_name')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual('f1.xml', fqs[0].document_name)

        fqs = self.qs.filter(document_name__in=['f1.xml',  'f2.xml']) \
                     .also('id', 'document_name').order_by('document_name')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual('f1.xml', fqs[0].document_name)

    def test_filter_exists(self):
        fqs = self.qs.filter(id__exists=True)
        self.assertEqual(4, fqs.count(),
                         "filter on id exists=true returns all documents")
        fqs = self.qs.filter(id__exists=False)
        self.assertEqual(0, fqs.count(),
                         "filter on id exists=false returns no documents")
        fqs = self.qs.filter(wnn__exists=False)
        self.assertEqual(3, fqs.count(),
                         "filter on wacky node name exists=false returns 3 documents")

    def test_or_filter(self):
        fqs = self.qs.or_filter(id='abc', name='four').only('id')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for OR filter on id='abc' or name='four' (got %s)" % fqs.count())
        ids = [obj.id for obj in fqs.all()]
        self.assert_('abc' in ids, 'id "abc" in list of ids when OR filter includes id="abc"')
        self.assert_('def' in ids, 'id "def" in list of ids when OR filter includes name="four')

    def test_exclude(self):
        fqs = self.qs.exclude(id='abc', name='one').only('id')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for exclude filter on id='abc' or name='one' (got %s)" % fqs.count())
        ids = [obj.id for obj in fqs.all()]
        self.assert_('abc' not in ids, 'id "abc" should not be in list of ids when exclude id="abc"')

    def test_filter_gtelte(self):
        # < <= > >=

        # subclass to add a numeric field to test with
        class CountQueryTestModel(QueryTestModel):
            name_count = xmlmap.IntegerField('count(name)')

        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION,
                      model=CountQueryTestModel)

        # each fixture has one and only one name
        self.assertEqual(0, qs.filter(name_count__gt=1).count())
        self.assertEqual(4, qs.filter(name_count__gte=1).count())
        self.assertEqual(4, qs.filter(name_count__lte=1).count())
        self.assertEqual(0, qs.filter(name_count__lt=1).count())

    def test_filter_document_path(self):
        # get full test path to first document
        item = self.qs.filter(name='one').only('document_name', 'collection_name').get()
        path = '%s/%s' % (item.collection_name, item.document_name)

        #
        fqs = self.qs.filter(document_path=path, name='one')
        self.assertEqual(1, fqs.count())
        fqs = self.qs.filter(document_path=path, name='two')
        self.assertEqual(0, fqs.count())

    def test_get(self):
        result = self.qs.get(contains="two")
        self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result")
        self.assertEqual(result.name, "two", "result returned by get() has correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_get_toomany(self):
        self.assertRaises(ReturnedMultiple, self.qs.get, contains="one")

    def test_get_nomatch(self):
        self.assertRaises(DoesNotExist, self.qs.get, contains="fifty-four")

    def test_get_byname(self):
        result = self.qs.get(name="one")
        self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result")
        self.assertEqual(result.name, "one", "result returned by get() has correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_get(self):
        result = self.qs.filter(contains="one").filter(name="two").get()
        self.assert_(isinstance(result, QueryTestModel))
        self.assertEqual("two", result.name, "filtered get() returns correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_reset(self):
        self.qs.filter(contains="two")
        self.qs.reset()
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_order_by(self):
        # element
        fqs = self.qs.order_by('name')
        self.assertEqual('four', fqs[0].name)
        self.assertEqual('one', fqs[1].name)
        self.assertEqual('three', fqs[2].name)
        self.assertEqual('two', fqs[3].name)
        self.assert_('order by ' not in self.qs.query.getQuery(), "main queryset unchanged by order_by()")
        # attribute
        fqs = self.qs.order_by('id')
        self.assertEqual('abc', fqs[0].id)
        self.assertEqual('def', fqs[1].id)
        self.assertEqual('one', fqs[2].id)
        self.assertEqual('xyz', fqs[3].id)
        # reverse sorting
        fqs = self.qs.order_by('-name')
        self.assertEqual('four', fqs[3].name)
        self.assertEqual('two', fqs[0].name)
        fqs = self.qs.order_by('-id')
        self.assertEqual('abc', fqs[3].id)
        self.assertEqual('xyz', fqs[0].id)
        # case-insensitive sorting - upper-case description should not sort first
        fqs = self.qs.order_by('~description')
        self.assert_(fqs[0].description.startswith('third'))
        self.assert_(fqs[1].description.startswith('This one contains'))
        # reverse case-insensitive sorting - flags in either order
        fqs = self.qs.order_by('~-description')
        self.assert_(fqs[3].description.startswith('third'))
        fqs = self.qs.order_by('-~description')
        self.assert_(fqs[3].description.startswith('third'))

    def test_only(self):
        self.qs.only('name')
        self.assert_('element name {' not in self.qs.query.getQuery(), "main queryset unchanged by only()")

        fqs = self.qs.filter(id='one').only('name', 'id', 'sub', 'or_field')
        self.assert_(isinstance(fqs[0], QueryTestModel))  # actually a Partial type derived from this
        # attributes that should be present
        self.assertNotEqual(fqs[0].id, None)
        self.assertNotEqual(fqs[0].sub, None)
        self.assertNotEqual(fqs[0].sub.subname, None)
        self.assertNotEqual(fqs[0].or_field, None)
        # attribute not returned
        self.assertEqual(fqs[0].description, None)
        self.assertEqual('one', fqs[0].id)
        self.assertEqual('one', fqs[0].name)
        self.assertEqual('la', fqs[0].sub.subname)
        self.assertEqual('one', fqs[0].or_field)    # = name (first of ORed fields present)

        fqs = self.qs.filter(id='one').only('wnn')
        self.assertTrue(hasattr(fqs[0], "wnn"))
        self.assertEqual(42, fqs[0].wnn)

        # nested field return
        fqs = self.qs.filter(id='one').only('name', 'id', 'sub__subname')
        self.assertEqual('la', fqs[0].sub.subname)

        # xpath function return
        fqs = self.qs.filter(id='one').only('substring')
        self.assertEqual('o', fqs[0].substring)

        # sub-subclass
        fqs = self.qs.filter(id='one').only('sub__ssc')
        self.assert_(isinstance(fqs[0], QueryTestModel))

    def test_only_hash(self):
        fqs = self.qs.only('hash')
        # no filters, should return all 3 test objects
        for result in fqs:
            # each return object should have a 40-character SHA-1 hash checksum
            self.assertEqual(40, len(result.hash),
                             'xquery result should have 40-character checksum, got %s' % result.hash)

    def test_document_name(self):
        fqs = self.qs.filter(id='one').only('document_name')
        # document_name attribute should be present
        self.assertNotEqual(fqs[0].document_name, None)
        self.assertEqual(fqs[0].document_name, "f1.xml")

        fqs = self.qs.filter(id='one').also('document_name')
        self.assertNotEqual(fqs[0].document_name, None)
        self.assertEqual(fqs[0].document_name, "f1.xml")

    def test_collection_name(self):
        fqs = self.qs.filter(id='one').only('collection_name')
        self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION)

        fqs = self.qs.filter(id='one').also('collection_name')
        self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION)

    def test_only_lastmodified(self):
        fqs = self.qs.only('last_modified')
        # no filters, should return all 3 test objects
        for result in fqs:
            self.assert_(isinstance(result.last_modified, datetime))

    def test_iter(self):
        for q in self.qs:
            self.assert_(isinstance(q, QueryTestModel))

    def test_slice_iter(self):
        i = 0
        for q in self.qs[1:2]:
            i += 1
        self.assertEqual(1, i)

    def test_also(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            name = xmlmap.StringField('.')
            parent_id = xmlmap.StringField('parent::root/@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name')
        name = qs.also('parent_id').get(name__exact='two')
        self.assertEqual('abc', name.parent_id,
                         "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '"
                         + name.parent_id + "'")

    def test_also_subfield(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            subname = xmlmap.StringField('subname')
            parent = xmlmap.NodeField('parent::root', QueryTestModel)

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub')
        name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la')
        self.assertEqual('la', name.subname)
        self.assertEqual('one', name.parent.id)
        self.assertEqual(42, name.parent.wnn)

    def test_also_raw(self):
        class SubqueryTestModel(QueryTestModel):
            myid = xmlmap.StringField('@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root')
        qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)')
        self.assertEqual('abc', qs[0].myid)
        # filtered version of the queryset with raw
        obj = qs.filter(name='two').get()
        self.assertEqual('abc', obj.myid)

        # multiple parameters
        obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)

    def test_only_raw(self):
        qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two')
        self.assertEqual('abc', qs[0].id)
        # filtered version
        obj = qs.get()
        self.assertEqual('abc', obj.id)

        # when combined with regular only, other fields come back correctly
        qs = self.qs.only('name', 'description', 'substring')
        obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc')
        self.assertEqual('two', obj.name)
        self.assertEqual('t', obj.substring)
        self.assertEqual('this one only has two', obj.description)
        self.assertEqual('abc', obj.id)

        # subfield
        obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one')
        self.assertEqual('la', obj.sub.subname)

        # multiple parameters
        obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)

        # list field - multiple return values
        class MyQueryTest(QueryTestModel):
            name = xmlmap.StringListField('name')
        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest)
        # return one object but find all the names in the test collection
        obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc')
        # 4 names in test fixtures - should come back as a list of those 4 names
        self.assertEqual(4, len(obj.name))

    def test_getDocument(self):
        obj = self.qs.getDocument("f1.xml")
        self.assert_(isinstance(obj, QueryTestModel),
                     "object returned by getDocument is instance of QueryTestModel")
        self.assertEqual("one", obj.name)

    def test_distinct(self):
        qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name')
        vals = qs.distinct()
        self.assert_('one' in vals)
        self.assert_('two' in vals)
        self.assert_('three' in vals)
        self.assert_('four' in vals)
        self.assert_('abc' not in vals)

    def test_namespaces(self):
        # filter on a field with a namespace
        fqs = self.qs.filter(nsfield='namespaced').all()
        self.assertEqual('namespaced', fqs[0].nsfield)
Example #30
0
class ExistQueryTest__FullText(unittest.TestCase):
    # when full-text indexing is enabled, eXist must index files when they are loaded to the db
    # this makes tests *significantly* slower
    # any tests that require full-text queries should be here

    # sample lucene configuration for testing full-text queries
    FIXTURE_INDEX = '''
    <collection xmlns="http://exist-db.org/collection-config/1.0">
        <index>
            <lucene>
                <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
                <text qname="description"/>
                <text qname="root"/>
            </lucene>
        </index>
    </collection>
    '''

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        # create index for collection - should be applied to newly loaded files
        self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX)

        load_fixtures(self.db)

        self.qs = QuerySet(using=self.db, xpath='/root',
                           collection=COLLECTION, model=QueryTestModel)

    def tearDown(self):
        self.db.removeCollection(COLLECTION)
        self.db.removeCollectionIndex(COLLECTION)

    def test_filter_fulltext_terms(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count())

    def test_filter_fulltext_options(self):
        qs = QuerySet(using=self.db, xpath='/root',
                      collection=COLLECTION, model=QueryTestModel,
                      fulltext_options={'default-operator': 'and'})
        # search for terms present in fixtures - but not both present in one doc
        fqs = qs.filter(description__fulltext_terms='only third')
        # for now, just confirm that the option is passed through to query
        self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery())
        # TODO: test this properly!
        # query options not supported in current version of eXist
        # self.assertEqual(0, fqs.count())

    def test_order_by__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score')
        self.assertEqual('one', fqs[0].name)    # one appears 3 times, should be first

    def test_only__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name')
        self.assert_(isinstance(fqs[0], QueryTestModel))  # actually a Partial type derived from this
        # fulltext score attribute should be present
        self.assertNotEqual(fqs[0].fulltext_score, None)
        self.assert_(float(fqs[0].fulltext_score) > 0.5)    # full-text score should be a float

    def test_fulltext_highlight(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        # result from fulltext search - by default, xml should have exist:match tags
        self.assert_('<exist:match' in fqs[0].serialize())

        fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # order of args in the same filter should not matter
        fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two')
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # separate filters should also work
        fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

    def test_highlight(self):
        fqs = self.qs.filter(highlight='supercalifragilistic')
        self.assertEqual(4, fqs.count(),
                         "highlight filter returns all documents even though search term is not present")

        fqs = self.qs.filter(highlight='one').order_by('id')
        self.assert_('<exist:match' in fqs[0].serialize())

    def test_match_count(self):
        fqs = self.qs.filter(id='one', highlight='one').only('match_count')
        self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found")

    def test_using(self):
        fqs = self.qs.using('new-collection')
        # using should update the collection on the xquery object
        self.assertEqual('new-collection', fqs.query.collection)