Example #1
0
def preview(request, archive):
    if request.method == 'POST':

        archive = get_object_or_404(Archive, slug=archive)
        filename = request.POST['filename']

        errors = []

        try:
            # only load to exist if document passes publication check
            ok, response, dbpath, fullpath = _prepublication_check(request, filename,
                archive, mode='preview')
            if ok is not True:
                return response

            db = ExistDB()
            # load the document to the *preview* collection in eXist with the same fileneame
            preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename
            # make sure the preview collection exists, but don't complain if it's already there
            success = db.load(open(fullpath, 'r'), preview_dbpath, overwrite=True)
        except ExistDBException, e:
            success = False
            errors.append(e.message())

        if success:
            # load the file as a FindingAid object so we can generate the preview url
            ead = load_xmlobject_from_file(fullpath, FindingAid)
            messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename)
            # redirect to document preview page with code 303 (See Other)
            return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid}))
        else:
            return render(request, 'fa_admin/publish-errors.html',
                    {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': e})
Example #2
0
 def _remove_file_from_exist(self, filename):
     db = ExistDB()
     fname = path.split(filename)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     # tests could remove fixtures, so an exception here is not a problem
     try:
         db.removeDocument(exist_path)
     except ExistDBException:
         # any way to determine if error ever needs to be reported?
         pass
Example #3
0
 def _remove_file_from_exist(self, file):
     db = ExistDB()
     fname = path.split(file)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     # tests could remove fixtures, so an exception here is not a problem
     try:
         db.removeDocument(exist_path)
     except ExistDBException:
         # any way to determine if error ever needs to be reported?
         pass
Example #4
0
    def _fixture_teardown(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            if 'index' in self.exist_fixtures:
                db.removeCollectionIndex(settings.EXISTDB_ROOT_COLLECTION)
            if 'directory' in self.exist_fixtures:
                for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._remove_file_from_exist(filename)
            if 'files' in self.exist_fixtures:
                for filename in self.exist_fixtures['files']:
                    self._remove_file_from_exist(filename)

        return super(TestCase, self)._fixture_teardown()
Example #5
0
    def use_test_collection(self):
        self.stored_default_collection = getattr(settings, "EXISTDB_ROOT_COLLECTION", None)

        if getattr(settings, "EXISTDB_TEST_COLLECTION", None):
            settings.EXISTDB_ROOT_COLLECTION = settings.EXISTDB_TEST_COLLECTION
        else:
            settings.EXISTDB_ROOT_COLLECTION = getattr(settings, "EXISTDB_ROOT_COLLECTION", "/default") + "_test"

        print >> sys.stderr, "Creating eXist Test Collection: %s" % \
            settings.EXISTDB_ROOT_COLLECTION
        # now that existdb root collection has been set to test collection, init db connection
        db = ExistDB()
        # create test collection (don't complain if collection already exists)
        db.createCollection(settings.EXISTDB_ROOT_COLLECTION, True)
Example #6
0
def delete_ead(request, id, archive=None):
    """ Delete a published EAD.

    On GET, display a form with information about the document to be removed.

    On POST, actually remove the specified EAD document from eXist and create (or
    update) a deleted record for that document in the relational DB.
    """
    # retrieve the finding aid to be deleted with fields needed for
    # form display or actual deletion

    if archive is not None:
        arch = get_object_or_404(Archive, slug=archive)
        filter = {'repository__fulltext_terms': '"%s"' % arch.name}
    else:
        filter = {}

    try:
        fa = FindingAid.objects.only('eadid', 'unittitle',
                            'document_name', 'collection_name').filter(**filter).get(eadid=id)

        # if this record has been deleted before, get that record and update it
        deleted_info, created = Deleted.objects.get_or_create(eadid=fa.eadid)
        deleted_info.title = unicode(fa.unittitle)   # update with title from current document

        render_form = False

        # on GET, display delete form
        if request.method == 'GET':
            # pre-populate the form with info from the finding aid to be removed
            delete_form = DeleteForm(instance=deleted_info)
            render_form = True

        else:   # POST : actually delete the document
            delete_form = DeleteForm(request.POST, instance=deleted_info)
            if delete_form.is_valid():
                delete_form.save()
                db = ExistDB()
                try:
                    success = db.removeDocument(fa.collection_name + '/' + fa.document_name)
                    if success:
                        DeleteForm(request.POST, instance=deleted_info).save()
                        messages.success(request, 'Successfully removed <b>%s</b>.' % id)
                    else:
                        # remove exited normally but was not successful
                        messages.error(request, 'Error: failed to removed <b>%s</b>.' % id)
                except ExistDBException, e:
                    messages.error(request, "Error: failed to remove <b>%s</b> - %s." \
                                % (id, e.message()))
            else:
Example #7
0
    def _fixture_teardown(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            if 'index' in self.exist_fixtures:
                db.removeCollectionIndex(settings.EXISTDB_ROOT_COLLECTION)
            if 'directory' in self.exist_fixtures:
                for file in glob(
                        path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._remove_file_from_exist(file)
            if 'files' in self.exist_fixtures:
                for file in self.exist_fixtures['files']:
                    self._remove_file_from_exist(file)

        return super(TestCase, self)._fixture_teardown()
Example #8
0
def preview(request, archive):
    if request.method == 'POST':

        archive = get_object_or_404(Archive, slug=archive)
        filename = request.POST['filename']

        errors = []
        err = None

        try:
            # only load to exist if document passes publication check
            ok, response, dbpath, fullpath = _prepublication_check(request, filename,
                archive, mode='preview')
            if ok is not True:
                return response

            db = ExistDB()
            # load the document to the *preview* collection in eXist with the same fileneame
            preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename
            # make sure the preview collection exists, but don't complain if it's already there
            success = db.load(open(fullpath, 'r'), preview_dbpath)
        except ExistDBException as err:
            success = False
            errors.append(err.message())

        if success:
            # load the file as a FindingAid object so we can generate the preview url
            ead = load_xmlobject_from_file(fullpath, FindingAid)
            messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename)
            # redirect to document preview page with code 303 (See Other)
            return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid}))
        else:
            # no exception but no success means the load failed;
            # *probably* due to insufficient permissions
            if errors == [] and success == False:
                errors.append('Failed to load the document to the preview collection')

            return render(request, 'fa_admin/publish-errors.html',
                    {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': err})

    # NOTE: preview list is not used anymore; functionality is handled
    # by main admin view; if we revisit preview list, to be more usable it
    # should be filterable by archive
    else:
        fa = get_findingaid(preview=True, only=['eadid', 'list_title', 'last_modified'],
                            order_by='last_modified')
        return render(request, 'fa_admin/preview_list.html',
                {'findingaids': fa, #'querytime': [fa.queryTime()]
                })
Example #9
0
    def _fixture_setup(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            # load index
            if 'index' in self.exist_fixtures:
                db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION,
                        open(self.exist_fixtures['index']))
            if 'directory' in self.exist_fixtures:
                for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._load_file_to_exist(filename)
            if 'files' in self.exist_fixtures:
                for filename in self.exist_fixtures['files']:
                    self._load_file_to_exist(filename)

        return super(TestCase, self)._fixture_setup()
Example #10
0
def index(request):
    # XML and SPARQL numbers

    # Count texts and authors
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')
    # TODO: order by title
    qs = qs.order_by('title_en')

    number_texts = qs.count()
    number_authors = qs.distinct().count()

    wiki_pages = []
    for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")):
        wiki_pages.append([page.replace(" ", "%20"), page])

    data = {
        'number_texts': number_texts,
        'number_authors': number_authors,
        'tei_documents': qs,
        "wiki_pages": wiki_pages,
    }

    return render(request, 'roche/index.html', data)
Example #11
0
    def _fixture_setup(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            # load index
            if 'index' in self.exist_fixtures:
                db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION,
                                       open(self.exist_fixtures['index']))
            if 'directory' in self.exist_fixtures:
                for file in glob(
                        path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._load_file_to_exist(file)
            if 'files' in self.exist_fixtures:
                for file in self.exist_fixtures['files']:
                    self._load_file_to_exist(file)

        return super(TestCase, self)._fixture_setup()
Example #12
0
    def restore_root_collection(self):
        # if use_test_collection didn't run, don't change anything
        if self.stored_default_collection is not None:
            print >> sys.stderr, "Removing eXist Test Collection: %s" % settings.EXISTDB_ROOT_COLLECTION
            # before restoring existdb non-test root collection, init db connection
            db = ExistDB()
            try:
                # remove test collection
                db.removeCollection(settings.EXISTDB_ROOT_COLLECTION)
            except ExistDBException, e:
                print >> sys.stderr, "Error removing collection %s: %s" \
                    % (settings.EXISTDB_ROOT_COLLECTION, e)

            print >> sys.stderr, "Restoring eXist Root Collection: %s" \
                % self.stored_default_collection
            settings.EXISTDB_ROOT_COLLECTION = self.stored_default_collection
Example #13
0
def visual_places(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    places = []
    for q in qs:
        places.extend(q.place_names)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_places = {}

    return render_to_response('r/visual_places.html', {
        'tei_documents': qs,
        'places': places,
        'juan': juan,
    },
                              context_instance=RequestContext(request))
Example #14
0
def index(request):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # Make titles unique (maybe there is a better method?)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Example #15
0
def index_title(request, letter):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    # filter by titles starting with letter
    qs = qs.filter(title__startswith=letter)

    return render_to_response('browser/index.html', {'tei_documents': qs},
                              context_instance=RequestContext(request))
Example #16
0
    def restore_root_collection(self):
        # if use_test_collection didn't run, don't change anything
        delattr(settings, "EXISTDB_ROOT_COLLECTION_REAL")

        if self.stored_default_collection is not None:
            print >> sys.stderr, "Removing eXist Test Collection: %s" % settings.EXISTDB_ROOT_COLLECTION
            # before restoring existdb non-test root collection, init db connection
            db = ExistDB()
            try:
                # remove test collection
                db.removeCollection(settings.EXISTDB_ROOT_COLLECTION)
            except ExistDBException, e:
                print >> sys.stderr, "Error removing collection %s: %s" \
                    % (settings.EXISTDB_ROOT_COLLECTION, e)

            print >> sys.stderr, "Restoring eXist Root Collection: %s" \
                % self.stored_default_collection
            settings.EXISTDB_ROOT_COLLECTION = self.stored_default_collection
Example #17
0
    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        # create index for collection - should be applied to newly loaded files
        self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX)

        load_fixtures(self.db)

        self.qs = QuerySet(using=self.db, xpath='/root',
                           collection=COLLECTION, model=QueryTestModel)
Example #18
0
def index(request):
    xmldb = ExistDB()
    qs = QuerySet(using=xmldb,
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI,
                  fulltext_options={'default-operator': 'and'})
    qs = qs.filter(body__fulltext_terms='至')

    return render_to_response('search/index.html', {'tei_documents': qs})
    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
                          username=EXISTDB_SERVER_USER,
                          password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-english.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-french.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION
Example #20
0
def index_author(request, author, startswith):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    if startswith:
        # filter by authors starting with letter
        qs = qs.filter(author__startswith=author)
    else:
        qs = qs.filter(author=author)

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Example #21
0
    def use_test_collection(self):
        self.stored_default_collection = getattr(settings,
                                                 "EXISTDB_ROOT_COLLECTION",
                                                 None)
        setattr(settings, "EXISTDB_ROOT_COLLECTION_REAL",
                self.stored_default_collection)

        if getattr(settings, "EXISTDB_TEST_COLLECTION", None):
            settings.EXISTDB_ROOT_COLLECTION = settings.EXISTDB_TEST_COLLECTION
        else:
            settings.EXISTDB_ROOT_COLLECTION = getattr(
                settings, "EXISTDB_ROOT_COLLECTION", "/default") + "_test"

        print >> sys.stderr, "Creating eXist Test Collection: %s" % \
            settings.EXISTDB_ROOT_COLLECTION
        # now that existdb root collection has been set to test collection, init db connection
        db = ExistDB()
        # create test collection (don't complain if collection already exists)
        db.createCollection(settings.EXISTDB_ROOT_COLLECTION, True)
Example #22
0
def text_info(request, title):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)

    result = ""
    place_names = []
    persons = []
    terms = []
    chapter_titles = []
    for q in qs:
        number_characters = 0
        for d in q.body.div:
            text = re.sub(RE_INTERPUCTION, '', d.text)
            text = text.replace("\n", "")
            #text = text.replace("", "")
            number_characters += len(text)

        if q.chapter_title:
            content = q.chapter_title.replace(" ", "").replace("\n", "")[:70]
        else:
            content = 'XXX'

        if q.chapter:
            chapter = q.chapter
        else:
            chapter = 1

        chapter_titles.append([chapter, content, number_characters])

        #place_names.extend(q.place_names)
        #persons.extend(q.persons)
        #terms.extend(q.terms)

    place_names = list(set(place_names))
    persons = list(set(persons))
    terms = list(set(terms))

    # Place names for leaflet
    # place_names
    js_data = json.dumps([[[50.5, 30.5], "test"]])

    return render_to_response('browser/text_view_info.html', {
        'tei_documents': qs,
        'tei_transform': result,
        'place_names': place_names,
        'persons': persons,
        'terms': terms,
        'js_data': js_data,
        'chapter_titles': sorted(chapter_titles)
    },
                              context_instance=RequestContext(request))
Example #23
0
class ModelTest(unittest.TestCase):
    COLLECTION = settings.EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB()
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())
Example #24
0
class ModelTest(unittest.TestCase):
    COLLECTION = EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
            username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml')
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml')

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())

    def test_sibling_query(self):
        # test sibling node access via 'also'
        exc = Exclamation.objects.filter(text='Au revoir').also('next').get()
        self.assertEqual('monde', exc.next)
Example #25
0
    def setUp(self):
        self.db = ExistDB()
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION
Example #26
0
    def test_ead_lastmodified(self):
        modified = ead_lastmodified('rqst', 'abbey244')
        self.assert_(isinstance(modified, datetime),
                     "ead_lastmodified should return a datetime object")
        date_format = '%Y-%m-%d'
        expected = datetime.now().strftime(date_format)
        value = modified.strftime(date_format)
        self.assertEqual(expected, value,
                     'ead lastmodified should be today, expected %s, got %s' % (expected, value))

        # invalid eadid
        self.assertRaises(Http404, ead_lastmodified, 'rqst', 'bogusid')

        db = ExistDB()
        # preview document - load fixture to preview collection
        fullpath = path.join(exist_fixture_path, 'raoul548.xml')
        db.load(open(fullpath, 'r'), settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
        preview_modified = ead_lastmodified('rqst', 'raoul548', preview=True)
        self.assert_(isinstance(preview_modified, datetime),
                     "ead_lastmodified should return a datetime object")
        # clean up
        db.removeDocument(settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
Example #27
0
def visual_timeline(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    persons = []
    for q in qs:
        persons.extend(q.persons)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_persons = {}
    if sparql_result:
        for binding in sparql_result.bindings:
            sparql_persons[binding[u"person"].value] = [
                binding[u"birthYear"].value, binding[u"deathYear"].value
            ]

    #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"]
    timeline_persons = []
    for p in set(persons):
        if sparql_persons.get(p, None):
            row = [
                p,
            ]
            row.append(int(sparql_persons[p][0]))
            row.append(int(sparql_persons[p][1]))
            timeline_persons.append(row)

    from operator import itemgetter

    timeline_persons = sorted(timeline_persons, key=itemgetter(1))
    timeline_persons = json.dumps(timeline_persons)

    return render_to_response('r/visual_timeline.html', {
        'tei_documents': qs,
        'timeline_persons': timeline_persons,
        'juan': juan
    },
                              context_instance=RequestContext(request))
Example #28
0
def text_download(request, title, file_format, juan=0):
    """
    Download a text or a single chapter as plain text file
    or as a (colored) pdf.
    """
    import pinyin

    pinyin_title = pinyin.get(title)

    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)
    if juan:
        qs = qs.filter(chapter=juan)

    result = ""
    for q in qs:
        for d in q.body.div:
            result += d.text.replace(" ", "").replace("\n", "").replace(
                "\t", "").replace(u"。", u"。\n\n")

    if file_format == 'txt':
        response = HttpResponse(content_type='text/plain')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.txt"'.format(
                pinyin_title)
        response.write(result)
    else:
        from fpdf import FPDF

        pdf = FPDF(unit='mm', format='A4')
        pdf.add_page()
        pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True)
        pdf.set_font('Droid', '', 12)
        pdf.write(5, unicode(result))
        response = HttpResponse(pdf.output(dest='S'),
                                content_type='application/pdf')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.pdf"'.format(
                pinyin_title)

    return response
Example #29
0
def text_view(request, title):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # filter by title
    qs = qs.filter(title=title).order_by('chapter')

    max_juan = qs.count()

    result = ""
    for q in qs:
        result = result + q.body.xsl_transform(xsl=XSL_TRANSFORM_1).serialize()

    text_title = qs[0].title

    data = {'tei_documents': qs, 'tei_transform': result,
            'text_title': text_title, 'max_juan': max_juan, }

    return render_to_response('browser/text_view.html', data,
                              context_instance=RequestContext(request))
Example #30
0
    def get_query_set(self):
        """
        Get the default :class:`eulexistdb.db.QuerySet` returned
        by this ``Manager``. Typically this returns a ``QuerySet`` based on
        the ``Manager``'s `xpath`, evaluated in the
        ``settings.EXISTDB_ROOT_COLLECTION`` on a default
        :class:`eulexistdb.db.ExistDB`.

        This is a convenient point for developers to customize an object's
        managers. Deriving a child class from Manager and overriding or
        extending this method is a handy way to create custom queries
        accessible from an :class:`~eulexistdb.models.XmlModel`.
        """

        if hasattr(settings, 'EXISTDB_FULLTEXT_OPTIONS'):
            fulltext_opts = settings.EXISTDB_FULLTEXT_OPTIONS
        else:
            fulltext_opts = {}

        return QuerySet(model=self.model,
                        xpath=self.xpath,
                        using=ExistDB(),
                        collection=settings.EXISTDB_ROOT_COLLECTION,
                        fulltext_options=fulltext_opts)
Example #31
0
    def render(self, context):
        from browser.models import DDBCPlaceName

        try:
            self.place_name = self.place_name.resolve(context)
        except template.VariableDoesNotExist:
            return ''

        qs = QuerySet(using=ExistDB(),
                      xpath='/tei:TEI//tei:place',
                      collection='docker/resources/',
                      model=DDBCPlaceName)
        qs = qs.filter(place_names=self.place_name)

        ddbc_output = u''
        for q in qs:
            ddbc_output += '<p>'
            ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>'
            ddbc_output += 'District: ' + q.district + '<br>'
            ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>'
            ddbc_output += 'Location: ' + q.geo + '<br>'
            ddbc_output += '</p>'

        return ddbc_output
class ModelTest(unittest.TestCase):
    COLLECTION = settings.EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
                          username=EXISTDB_SERVER_USER,
                          password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-english.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-french.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())

    def test_sibling_query(self):
        # test sibling node access via 'also'
        exc = Exclamation.objects.filter(text='Au revoir').also('next').get()
        self.assertEqual('monde', exc.next)
Example #33
0
 def _load_file_to_exist(self, filename):
     db = ExistDB()
     fname = path.split(filename)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     db.load(open(filename), exist_path)
Example #34
0
def publish(request):
    """
    Admin publication form.  Allows publishing an EAD file by updating or adding
    it to the configured eXist database so it will be immediately visible on
    the public site.  Files can only be published if they pass an EAD sanity check,
    implemented in :meth:`~findingaids.fa_admin.utils.check_ead`.

    On POST, sanity-check the EAD file specified in request from the configured
    and (if it passes all checks), publish it to make it immediately visible on
    the site.  If publish is successful, redirects the user to main admin page
    with a success message that links to the published document on the site.
    If the sanity-check fails, displays a page with any problems found.
    """
    # formerly supported publish from filename, but now only supports
    # publish from preview
    if 'preview_id' not in request.POST:
        messages.error(request, "No preview document specified for publication")
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    id = request.POST['preview_id']

    # retrieve info about the document from preview collection
    try:
        # because of the way eulcore.existdb.queryset constructs returns with 'also' fields,
        # it is simpler and better to retrieve document name separately
        ead = get_findingaid(id, preview=True)
        ead_docname = get_findingaid(id, preview=True, only=['document_name'])
        filename = ead_docname.document_name
    except Http404:     # not found in exist
        messages.error(request,
            '''Publish failed. Could not retrieve <b>%s</b> from preview collection.
            Please reload and try again.''' % id)

        # if ead could not be retrieved from preview mode, skip processing
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    # determine archive this ead is associated with

    xml = ead.serialize()
    archive = None
    if not ead.repository:
        messages.error(request,
            '''Publish failed. Could not determine which archive <b>%s</b> belongs to.
            Please update subarea, reload, and try again.''' % id)
    else:
        archive_name = ead.repository[0]
        # NOTE: EAD supports multiple subarea tags, but in practice we only
        # use one, so it should be safe to assume the first should be used for permissions
        try:
            archive = Archive.objects.get(name=archive_name)
        except ObjectDoesNotExist:
            messages.error(request,
            '''Publish failed. Could not find archive <b>%s</b>.''' % archive_name)

    # bail out if archive could not be identified
    if archive is None:
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    # check that user is allowed to publish this document
    if not archive_access(request.user, archive.slug):
        messages.error(request,
            '''You do not have permission to publish <b>%s</b> materials.''' \
            % archive.label)
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))


    errors = []
    try:
        ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive, xml=xml)
        if ok is not True:
            # publication check failed - do not publish
            return response

        # only load to exist if there are no errors found
        db = ExistDB()
        # get information to determine if an existing file is being replaced
        replaced = db.describeDocument(dbpath)

        try:
            # move the document from preview collection to configured public collection
            success = db.moveDocument(settings.EXISTDB_PREVIEW_COLLECTION,
                    settings.EXISTDB_ROOT_COLLECTION, filename)
            # FindingAid instance ead already set above
        except ExistDBException, e:
            # special-case error message
            errors.append("Failed to move document %s from preview collection to main collection." \
                            % filename)
            # re-raise and let outer exception handling take care of it
            raise e

    except ExistDBException, e:
        errors.append(e.message())
        success = False
import os

from os import walk
from eulexistdb.db import ExistDB

#
# Timeout higher?
#

#
# http://username:[email protected]:8080/exist
#
# YOU NEED TO INSERT THE USER AND PASSWORD HERE
#xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist')
xmldb = ExistDB('http://*****:*****@localhost:8080/exist')

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                print "--" + dirpath + '/' + filename
                xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename,
                           True)
Example #36
0
from os import walk
from eulexistdb.db import ExistDB
from roche.settings import EXISTDB_SERVER_URL
from roche.settings import SOLR_SERVER_URL

import sunburnt
import libxslt
import libxml2

from browser.models import RocheTEI
from eulexistdb.query import QuerySet

#
# Timeout higher?
#
xmldb = ExistDB(timeout=60)

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in sorted(filenames):
            with open(os.path.join(dirpath, filename)) as f:
                print "--" + os.path.join(dirpath, filename)
                try:
                    xmldb.load(
                        f, os.path.join('docker', 'texts', dirpath, filename),
Example #37
0
class Command(BaseCommand):    
    help = """Tasks for managing eXist-db index configuration file.

Available subcommands:
  load-index      - load index configuration file to eXist
  show-index      - show the contents of index configuration file currently in eXist
  index-info      - show information about index configuration file in eXist (owner, date modified, etc.)
  remove-index    - remove index configuration from eXist
  reindex         - reindex the configured eXist collection with the loaded index
  """

    arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex']

    args = ' | '. join(arg_list)

    # FIXME/TODO: possibly convert into a django LabelCommand 
    
    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")
            return

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.
            self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message =  "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)
Example #38
0
class ExistQueryTest(unittest.TestCase):

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        load_fixtures(self.db)
        self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)

    def tearDown(self):
        self.db.removeCollection(COLLECTION)

    def test_count(self):
        load_fixtures(self.db)
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "queryset count returns number of fixtures")

    def test_getitem(self):
        qs = self.qs.order_by('id')     # adding sort order to test reliably
        self.assertEqual("abc", qs[0].id)
        self.assertEqual("def", qs[1].id)
        self.assertEqual("one", qs[2].id)
        self.assertEqual("xyz", qs[3].id)

    def test_getitem_typeerror(self):
        self.assertRaises(TypeError, self.qs.__getitem__, "foo")

    def test_getitem_indexerror(self):
        self.assertRaises(IndexError, self.qs.__getitem__, -1)
        self.assertRaises(IndexError, self.qs.__getitem__, 23)

    def test_getslice(self):
        slice = self.qs.order_by('id')[0:2]
        self.assert_(isinstance(slice, QuerySet))
        self.assert_(isinstance(slice[0], QueryTestModel))
        self.assertEqual(2, slice.count())
        self.assertEqual(2, len(slice))
        self.assertEqual('abc', slice[0].id)
        self.assertEqual('def', slice[1].id)
        self.assertRaises(IndexError, slice.__getitem__, 2)

        slice = self.qs.order_by('id')[1:3]
        self.assertEqual('def', slice[0].id)
        self.assertEqual('one', slice[1].id)

        slice = self.qs.order_by('id')[3:5]
        self.assertEqual(1, slice.count())
        self.assertEqual('xyz', slice[0].id)
        self.assertRaises(IndexError, slice.__getitem__, 1)

        # test slicing with unspecified bounds
        slice = self.qs.order_by('id')[:2]
        self.assertEqual(2, slice.count())
        self.assertEqual('def', slice[1].id)

        slice = self.qs.order_by('id')[1:]
        self.assertEqual(3, slice.count())
        self.assertEqual('one', slice[1].id)
        self.assertEqual('xyz', slice[2].id)

    def test_filter(self):
        fqs = self.qs.filter(contains="two")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered - contains 'two'")
        self.assertEqual("two", fqs[0].name, "name matches filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field(self):
        fqs = self.qs.filter(name="one")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered on name = 'one' (got %s)"
                         % self.qs.count())
        self.assertEqual("one", fqs[0].name, "name matches filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_xpath(self):
        fqs = self.qs.filter(id="abc")
        self.assertEqual(1, fqs.count(), "count returns 1 when filtered on @id = 'abc' (got %s)"
                         % self.qs.count())
        self.assertEqual("two", fqs[0].name, "name returned is correct for id filter")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_contains(self):
        fqs = self.qs.filter(name__contains="o")
        self.assertEqual(3, fqs.count(),
                         "should get 3 matches for filter on name contains 'o' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_contains_special(self):
        fqs = self.qs.filter(description__contains=' "quote" ')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on desc contains ' \"quote\" ' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        fqs = self.qs.filter(description__contains=' &!')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on desc contains ' &!' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_field_startswith(self):
        fqs = self.qs.filter(name__startswith="o")
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on name starts with 'o' (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_subobject_field(self):
        fqs = self.qs.filter(sub__subname="la")
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for filter on sub_subname = 'la' (got %s)" % fqs.count())

    def test_filter_in(self):
        fqs = self.qs.filter(id__in=['abc', 'xyz', 'qrs'])
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on id in list (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml'])
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

        # filtering on a special field - should still be able to return/access it via only
        fqs = self.qs.filter(document_name__in=['f1.xml', 'f2.xml']) \
                     .only('id', 'document_name').order_by('document_name')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual('f1.xml', fqs[0].document_name)

        fqs = self.qs.filter(document_name__in=['f1.xml',  'f2.xml']) \
                     .also('id', 'document_name').order_by('document_name')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for filter on document name in list (got %s)" % fqs.count())
        self.assertEqual('f1.xml', fqs[0].document_name)

    def test_filter_exists(self):
        fqs = self.qs.filter(id__exists=True)
        self.assertEqual(4, fqs.count(),
                         "filter on id exists=true returns all documents")
        fqs = self.qs.filter(id__exists=False)
        self.assertEqual(0, fqs.count(),
                         "filter on id exists=false returns no documents")
        fqs = self.qs.filter(wnn__exists=False)
        self.assertEqual(3, fqs.count(),
                         "filter on wacky node name exists=false returns 3 documents")

    def test_or_filter(self):
        fqs = self.qs.or_filter(id='abc', name='four').only('id')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for OR filter on id='abc' or name='four' (got %s)" % fqs.count())
        ids = [obj.id for obj in fqs.all()]
        self.assert_('abc' in ids, 'id "abc" in list of ids when OR filter includes id="abc"')
        self.assert_('def' in ids, 'id "def" in list of ids when OR filter includes name="four')

    def test_exclude(self):
        fqs = self.qs.exclude(id='abc', name='one').only('id')
        self.assertEqual(
            2, fqs.count(),
            "should get 2 matches for exclude filter on id='abc' or name='one' (got %s)" % fqs.count())
        ids = [obj.id for obj in fqs.all()]
        self.assert_('abc' not in ids, 'id "abc" should not be in list of ids when exclude id="abc"')

    def test_filter_gtelte(self):
        # < <= > >=

        # subclass to add a numeric field to test with
        class CountQueryTestModel(QueryTestModel):
            name_count = xmlmap.IntegerField('count(name)')

        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION,
                      model=CountQueryTestModel)

        # each fixture has one and only one name
        self.assertEqual(0, qs.filter(name_count__gt=1).count())
        self.assertEqual(4, qs.filter(name_count__gte=1).count())
        self.assertEqual(4, qs.filter(name_count__lte=1).count())
        self.assertEqual(0, qs.filter(name_count__lt=1).count())

    def test_filter_document_path(self):
        # get full test path to first document
        item = self.qs.filter(name='one').only('document_name', 'collection_name').get()
        path = '%s/%s' % (item.collection_name, item.document_name)

        #
        fqs = self.qs.filter(document_path=path, name='one')
        self.assertEqual(1, fqs.count())
        fqs = self.qs.filter(document_path=path, name='two')
        self.assertEqual(0, fqs.count())

    def test_get(self):
        result = self.qs.get(contains="two")
        self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result")
        self.assertEqual(result.name, "two", "result returned by get() has correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_get_toomany(self):
        self.assertRaises(ReturnedMultiple, self.qs.get, contains="one")

    def test_get_nomatch(self):
        self.assertRaises(DoesNotExist, self.qs.get, contains="fifty-four")

    def test_get_byname(self):
        result = self.qs.get(name="one")
        self.assert_(isinstance(result, QueryTestModel), "get() with contains returns single result")
        self.assertEqual(result.name, "one", "result returned by get() has correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_filter_get(self):
        result = self.qs.filter(contains="one").filter(name="two").get()
        self.assert_(isinstance(result, QueryTestModel))
        self.assertEqual("two", result.name, "filtered get() returns correct data")
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_reset(self):
        self.qs.filter(contains="two")
        self.qs.reset()
        self.assertEqual(NUM_FIXTURES, self.qs.count(), "main queryset remains unchanged by filter")

    def test_order_by(self):
        # element
        fqs = self.qs.order_by('name')
        self.assertEqual('four', fqs[0].name)
        self.assertEqual('one', fqs[1].name)
        self.assertEqual('three', fqs[2].name)
        self.assertEqual('two', fqs[3].name)
        self.assert_('order by ' not in self.qs.query.getQuery(), "main queryset unchanged by order_by()")
        # attribute
        fqs = self.qs.order_by('id')
        self.assertEqual('abc', fqs[0].id)
        self.assertEqual('def', fqs[1].id)
        self.assertEqual('one', fqs[2].id)
        self.assertEqual('xyz', fqs[3].id)
        # reverse sorting
        fqs = self.qs.order_by('-name')
        self.assertEqual('four', fqs[3].name)
        self.assertEqual('two', fqs[0].name)
        fqs = self.qs.order_by('-id')
        self.assertEqual('abc', fqs[3].id)
        self.assertEqual('xyz', fqs[0].id)
        # case-insensitive sorting - upper-case description should not sort first
        fqs = self.qs.order_by('~description')
        self.assert_(fqs[0].description.startswith('third'))
        self.assert_(fqs[1].description.startswith('This one contains'))
        # reverse case-insensitive sorting - flags in either order
        fqs = self.qs.order_by('~-description')
        self.assert_(fqs[3].description.startswith('third'))
        fqs = self.qs.order_by('-~description')
        self.assert_(fqs[3].description.startswith('third'))

    def test_only(self):
        self.qs.only('name')
        self.assert_('element name {' not in self.qs.query.getQuery(), "main queryset unchanged by only()")

        fqs = self.qs.filter(id='one').only('name', 'id', 'sub', 'or_field')
        self.assert_(isinstance(fqs[0], QueryTestModel))  # actually a Partial type derived from this
        # attributes that should be present
        self.assertNotEqual(fqs[0].id, None)
        self.assertNotEqual(fqs[0].sub, None)
        self.assertNotEqual(fqs[0].sub.subname, None)
        self.assertNotEqual(fqs[0].or_field, None)
        # attribute not returned
        self.assertEqual(fqs[0].description, None)
        self.assertEqual('one', fqs[0].id)
        self.assertEqual('one', fqs[0].name)
        self.assertEqual('la', fqs[0].sub.subname)
        self.assertEqual('one', fqs[0].or_field)    # = name (first of ORed fields present)

        fqs = self.qs.filter(id='one').only('wnn')
        self.assertTrue(hasattr(fqs[0], "wnn"))
        self.assertEqual(42, fqs[0].wnn)

        # nested field return
        fqs = self.qs.filter(id='one').only('name', 'id', 'sub__subname')
        self.assertEqual('la', fqs[0].sub.subname)

        # xpath function return
        fqs = self.qs.filter(id='one').only('substring')
        self.assertEqual('o', fqs[0].substring)

        # sub-subclass
        fqs = self.qs.filter(id='one').only('sub__ssc')
        self.assert_(isinstance(fqs[0], QueryTestModel))

    def test_only_hash(self):
        fqs = self.qs.only('hash')
        # no filters, should return all 3 test objects
        for result in fqs:
            # each return object should have a 40-character SHA-1 hash checksum
            self.assertEqual(40, len(result.hash),
                             'xquery result should have 40-character checksum, got %s' % result.hash)

    def test_document_name(self):
        fqs = self.qs.filter(id='one').only('document_name')
        # document_name attribute should be present
        self.assertNotEqual(fqs[0].document_name, None)
        self.assertEqual(fqs[0].document_name, "f1.xml")

        fqs = self.qs.filter(id='one').also('document_name')
        self.assertNotEqual(fqs[0].document_name, None)
        self.assertEqual(fqs[0].document_name, "f1.xml")

    def test_collection_name(self):
        fqs = self.qs.filter(id='one').only('collection_name')
        self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION)

        fqs = self.qs.filter(id='one').also('collection_name')
        self.assertEqual(fqs[0].collection_name, '/db' + COLLECTION)

    def test_only_lastmodified(self):
        fqs = self.qs.only('last_modified')
        # no filters, should return all 3 test objects
        for result in fqs:
            self.assert_(isinstance(result.last_modified, datetime))

    def test_iter(self):
        for q in self.qs:
            self.assert_(isinstance(q, QueryTestModel))

    def test_slice_iter(self):
        i = 0
        for q in self.qs[1:2]:
            i += 1
        self.assertEqual(1, i)

    def test_also(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            name = xmlmap.StringField('.')
            parent_id = xmlmap.StringField('parent::root/@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//name')
        name = qs.also('parent_id').get(name__exact='two')
        self.assertEqual('abc', name.parent_id,
                         "parent id set correctly when returning at name level with also parent_id specified; should be 'abc', got '"
                         + name.parent_id + "'")

    def test_also_subfield(self):
        class SubqueryTestModel(xmlmap.XmlObject):
            subname = xmlmap.StringField('subname')
            parent = xmlmap.NodeField('parent::root', QueryTestModel)

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='//sub')
        name = qs.also('parent__id', 'parent__wnn').get(subname__exact='la')
        self.assertEqual('la', name.subname)
        self.assertEqual('one', name.parent.id)
        self.assertEqual(42, name.parent.wnn)

    def test_also_raw(self):
        class SubqueryTestModel(QueryTestModel):
            myid = xmlmap.StringField('@id')

        qs = QuerySet(using=self.db, collection=COLLECTION, model=SubqueryTestModel, xpath='/root')
        qs = qs.filter(id='abc').also_raw(myid='string(%(xq_var)s//name/ancestor::root/@id)')
        self.assertEqual('abc', qs[0].myid)
        # filtered version of the queryset with raw
        obj = qs.filter(name='two').get()
        self.assertEqual('abc', obj.myid)

        # multiple parameters
        obj = qs.filter(id='abc').also_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)

    def test_only_raw(self):
        qs = self.qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').filter(name='two')
        self.assertEqual('abc', qs[0].id)
        # filtered version
        obj = qs.get()
        self.assertEqual('abc', obj.id)

        # when combined with regular only, other fields come back correctly
        qs = self.qs.only('name', 'description', 'substring')
        obj = qs.only_raw(id='xs:string(%(xq_var)s//name/ancestor::root/@id)').get(id='abc')
        self.assertEqual('two', obj.name)
        self.assertEqual('t', obj.substring)
        self.assertEqual('this one only has two', obj.description)
        self.assertEqual('abc', obj.id)

        # subfield
        obj = qs.only_raw(sub__subname='normalize-space(%(xq_var)s//subname)').get(id='one')
        self.assertEqual('la', obj.sub.subname)

        # multiple parameters
        obj = self.qs.filter(id='abc').only_raw(id='string(%(xq_var)s/@id)',
            name='normalize-space(%(xq_var)s//name)').get(id='abc')
        self.assertEqual('abc', obj.id)
        self.assertEqual('two', obj.name)

        # list field - multiple return values
        class MyQueryTest(QueryTestModel):
            name = xmlmap.StringListField('name')
        qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=MyQueryTest)
        # return one object but find all the names in the test collection
        obj = qs.filter(id='abc').only_raw(name='collection("/db%s")//name' % COLLECTION).get(id='abc')
        # 4 names in test fixtures - should come back as a list of those 4 names
        self.assertEqual(4, len(obj.name))

    def test_getDocument(self):
        obj = self.qs.getDocument("f1.xml")
        self.assert_(isinstance(obj, QueryTestModel),
                     "object returned by getDocument is instance of QueryTestModel")
        self.assertEqual("one", obj.name)

    def test_distinct(self):
        qs = QuerySet(using=self.db, collection=COLLECTION, xpath='//name')
        vals = qs.distinct()
        self.assert_('one' in vals)
        self.assert_('two' in vals)
        self.assert_('three' in vals)
        self.assert_('four' in vals)
        self.assert_('abc' not in vals)

    def test_namespaces(self):
        # filter on a field with a namespace
        fqs = self.qs.filter(nsfield='namespaced').all()
        self.assertEqual('namespaced', fqs[0].nsfield)
    subprocess.call(["ssh-agent", "bash", "-c", "ssh-add /docker/github_rsa ; /usr/bin/git pull;"],
            stdout=devnull, stderr=devnull)


    # Call UIMA analysis engine
    if not juan == -1:
        file_name = os.path.join(collection_path, "%03d.xml" % (juan,))
        result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8",
                                  "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties",
                                  "-jar", BERTIE_JAR,
                                  "--tei",
                                  "--file", file_name,
                                  "--owl", f.name], stdout=devnull, stderr=devnull)

        # Reload single document for faster response
        xmldb = ExistDB(server_url="http://*****:*****@" + existdb_host + ":8080/exist", timeout=10)
        db_collection_path = 'docker/texts/' + \
             collection_path.replace('/docker/dublin-store/', '')
        with open(file_name) as newly_annotated_file:
            print " [ ] Reloading single document"
            try:
                xmldb.load(newly_annotated_file, os.path.join(db_collection_path, os.path.split(file_name)[1]), True)
            except:
                print "FAILED TO LOAD " + file_name

        # Send response early
        send_response("OK")

    start_uima = time.time()
    result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8",
                              "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties",
Example #40
0
class Command(BaseCommand):
    args = '<filename filename filename ...>'
    help = '''Loads XML files into the configured eXist collection.
The local copy will be *removed* after it is successfully loaded.'''

    option_list = BaseCommand.option_list + (
        make_option('--dry-run', '-n',
            dest='dryrun',
            action='store_true',
            help='''Report on what would be done, but don't delete any files'''
        ),
    )

    v_normal = 1
    def handle(self, *files, **options):
        verbosity = int(options.get('verbosity', self.v_normal))

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \
           not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return

        self.db = ExistDB()
        self.cbgeocoder = CodebookGeocoder()

        # initalize progress bar
        pbar = None
        total = len(files)
        # init progress bar if processing enough files, running on a terminal
        if total >= 10 and os.isatty(sys.stderr.fileno()):
            widgets = [Percentage(), ' (', SimpleProgress(), ')',
                       Bar(), ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=total).start()

        errored = 0
        loaded = 0
        for f in files:
            success = False

            if pbar:
                pbar.update(errored + loaded)

            try:
                # full path location where file will be loaded in exist db collection
                dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f)
                # TODO: any error checking? validation?

                start = time.time()
                cb = load_xmlobject_from_file(f, CodeBook)
                logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start))

                start = time.time()
                self.prep(cb)
                logger.debug('%s prepped in %f sec' % (f, time.time() - start))
                # load to eXist from string since DDI documents aren't that large,
                # rather than reloading the file
                if not options.get('dryrun', False):
                    start = time.time()
                    success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True)
                    logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start))

            except IOError as e:
                self.stdout.write("Error opening %s: %s" % (f, e))
                errored += 1

            except ExistDBException as e:
                self.stdout.write("Error: failed to load %s to eXist" % f)
                self.stdout.write(e.message())
                errored += 1

            if not options.get('dryrun', False) and success:
                loaded += 1
                if verbosity > self.v_normal:
                    self.stdout.write("Loaded %s as %s" % (f, dbpath))

                try:
                    os.remove(f)
                except OSError as e:
                    self.stdout.write('Error removing %s: %s' % (f, e))

        if pbar:
           pbar.finish()

        # output a summary of what was done if more than one file was processed
        if verbosity >= self.v_normal:
            if loaded > 1:
                self.stdout.write("%d document%s loaded" % \
                                  (loaded, 's' if loaded != 1 else ''))
            if errored > 1:
                self.stdout.write("%d document%s with errors" % \
                                  (errored, 's' if errored != 1 else ''))

    topic_id = re.compile('^(?P<org>[A-Z]+)[ .](?P<id>[IVX]+(\.[A-Z](\.[0-9]+(\.[a-z]+)?)?)?)')


    def prep(self, cb):
        # do any prep work or cleanup that needs to be done
        # before loading to exist
        self.local_topics(cb)
        self.clean_dates(cb)
        self.cbgeocoder.code_locations(cb)

    def icpsr_topic_id(self, topic):
        # generate icpsr topic id in the format needed for lookup in our
        # topic dictionary; returns None if not an ICPSR topic
        m = self.topic_id.match(topic)
        if m:
            match_info = m.groupdict()
            if match_info['org'] == 'ICPSR':
                return '%(org)s.%(id)s' % match_info

    def local_topics(self, cb):
        # convert ICPSR topics to local topics
        for t in cb.topics:
            topic_id = self.icpsr_topic_id(t.val)
            if topic_id is not None:
                new_topic = topic_mappings.get(topic_id, None)
                if new_topic:
                    cb.topics.append(Topic(val=new_topic,
                        vocab='local'))

                # conditional topics if the geographic coverage is global
                if topic_id in conditional_topics['global'] and \
                  'Global' in [unicode(gc) for gc in cb.geo_coverage]:
                    cb.topics.append(Topic(val=conditional_topics['global'][topic_id],
                                           vocab='local'))

    def clean_dates(self, cb):
        # clean up dates so we can search consistently on 4-digit years
        # or more; dates should be YYYY, YYYY-MM, or YYYY-MM-DD
        prev_date = None
        for d in cb.time_periods:
            # special case: two-digit date as second date in a cycle
            # interpret as month on the year that starts the cycle
            if d.event == 'end' and d.cycle == prev_date.cycle and \
                    len(d.date) == 2:
               d.date = '%04d-%02d' % (int(prev_date.date), int(d.date))

            elif len(d.date) < 4:
                d.date = '%04d' % int(d.date)

            # store current date as previous date for next loop, in case
            # we need to clean up an end date in a cycle
            prev_date = d
Example #41
0
    def handle(self, *args, **options):
        verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        v_normal = 1
        v_all = 2

        if options['pdf_only'] and options['skip_pdf_reload']:
            raise CommandError("Options -s and -p are not compatible")

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return

        if len(args):
            files = args
        else:
            # Note: copied from prep_ead manage command; move somewhere common?
            files = set()
            svn = svn_client()
            for archive in Archive.objects.all():
                # update to make sure we have latest version of everything
                svn.update(str(archive.svn_local_path))   # apparently can't handle unicode
                files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml'))))

        if verbosity == v_all:
            print 'Documents will be loaded to configured eXist collection: %s' \
                    % settings.EXISTDB_ROOT_COLLECTION
            if options['skip_pdf_reload']:
                print "** Skipping PDFs cache reload"

        db = ExistDB()

        loaded = 0
        errored = 0
        pdf_tasks = {}

        start_time = datetime.now()

        if not options['pdf_only']:
        # unless PDF reload only has been specified, load files

            for file in files:
                try:
                    # full path location where file will be loaded in exist db collection
                    dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file)
                    errors = check_ead(file, dbpath)
                    if errors:
                        # report errors, don't load
                        errored += 1
                        print "Error: %s does not pass publication checks; not loading to eXist." % file
                        if verbosity >= v_normal:
                            print "  Errors found:"
                            for err in errors:
                                print "    %s" % err
                    else:
                        with open(file, 'r') as eadfile:
                            success = db.load(eadfile, dbpath, overwrite=True)

                        if success:
                            loaded += 1
                            if verbosity >= v_normal:
                                print "Loaded %s" % file
                            # load the file as a FindingAid object to get the eadid for PDF reload
                            ead = load_xmlobject_from_file(file, FindingAid)

                            # trigger PDF regeneration in the cache and store task result
                            # - unless user has requested PDF reload be skipped
                            if not options['skip_pdf_reload']:
                                pdf_tasks[ead.eadid.value] = reload_cached_pdf.delay(ead.eadid.value)
                                # NOTE: unlike the web admin publish, this does not
                                # generate TaskResult db records; task outcomes will be
                                # checked & reported before the script finishes
                        else:
                            errored += 1
                            print "Error: failed to load %s to eXist" % file
                except ExistDBException, e:
                    print "Error: failed to load %s to eXist" % file
                    print e.message()
                    errored += 1

            # output a summary of what was done
            print "%d document%s loaded" % (loaded, 's' if loaded != 1 else '')
            print "%d document%s with errors" % (errored, 's' if errored != 1 else '')
Example #42
0
 def _load_file_to_exist(self, file):
     db = ExistDB()
     fname = path.split(file)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     db.load(open(file), exist_path, True)
Example #43
0
def publish(request):
    """
    Admin publication form.  Allows publishing an EAD file by updating or adding
    it to the configured eXist database so it will be immediately visible on
    the public site.  Files can only be published if they pass an EAD sanity check,
    implemented in :meth:`~findingaids.fa_admin.utils.check_ead`.

    On POST, sanity-check the EAD file specified in request from the configured
    and (if it passes all checks), publish it to make it immediately visible on
    the site.  If publish is successful, redirects the user to main admin page
    with a success message that links to the published document on the site.
    If the sanity-check fails, displays a page with any problems found.
    """
    # formerly supported publish from filename, but now only supports
    # publish from preview
    if 'preview_id' not in request.POST:
        messages.error(request, "No preview document specified for publication")
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    id = request.POST['preview_id']

    # retrieve info about the document from preview collection
    try:
        # because of the way existdb.query.queryset constructs returns with 'also' fields,
        # it is simpler and better to retrieve document name separately
        ead = get_findingaid(id, preview=True)
        ead_docname = get_findingaid(id, preview=True, only=['document_name'])
        filename = ead_docname.document_name
    except (ExistDBException, Http404):     # not found in exist OR permission denied
        messages.error(request,
            '''Publish failed. Could not retrieve <b>%s</b> from preview collection.
            Please reload and try again.''' % id)

        # if ead could not be retrieved from preview mode, skip processing
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    # determine archive this ead is associated with
    archive = None
    if not ead.repository:
        messages.error(request,
            '''Publish failed. Could not determine which archive <b>%s</b> belongs to.
            Please update subarea, reload, and try again.''' % id)
    else:
        archive_name = ead.repository[0]
        # NOTE: EAD supports multiple subarea tags, but in practice we only
        # use one, so it should be safe to assume the first should be used for permissions
        try:
            archive = Archive.objects.get(name=archive_name)
        except ObjectDoesNotExist:
            messages.error(request,
            '''Publish failed. Could not find archive <b>%s</b>.''' % archive_name)

    # bail out if archive could not be identified
    if archive is None:
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    # check that user is allowed to publish this document
    if not archive_access(request.user, archive.slug):
        messages.error(request,
            '''You do not have permission to publish <b>%s</b> materials.''' \
            % archive.label)
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))

    errors = []
    try:
        # NOTE: *not* using serialized xml here, because it may introduce
        # whitespace errors not present in the original file.
        ok, response, dbpath, fullpath = _prepublication_check(request, filename, archive)
        if ok is not True:
            # publication check failed - do not publish
            return response

        # only load to exist if there are no errors found
        db = ExistDB()
        # get information to determine if an existing file is being replaced
        replaced = db.describeDocument(dbpath)

        try:
            # move the document from preview collection to configured public collection
            success = db.moveDocument(settings.EXISTDB_PREVIEW_COLLECTION,
                    settings.EXISTDB_ROOT_COLLECTION, filename)
            # FindingAid instance ead already set above
        except ExistDBException, e:
            # special-case error message
            errors.append("Failed to move document %s from preview collection to main collection." \
                            % filename)
            # re-raise and let outer exception handling take care of it
            raise e

    except ExistDBException as err:
        errors.append(err.message())
        success = False

    if success:
        # request the cache to reload the PDF - queue asynchronous task
        result = reload_cached_pdf.delay(ead.eadid.value)
        task = TaskResult(label='PDF reload', object_id=ead.eadid.value,
            url=reverse('fa:findingaid', kwargs={'id': ead.eadid.value}),
            task_id=result.task_id)
        task.save()

        ead_url = reverse('fa:findingaid', kwargs={'id': ead.eadid.value})
        change = "updated" if replaced else "added"
        messages.success(request, 'Successfully %s <b>%s</b>. View <a href="%s">%s</a>.'
                % (change, filename, ead_url, unicode(ead.unittitle)))

        # redirect to main admin page and display messages
        return HttpResponseSeeOtherRedirect(reverse('fa-admin:index'))
    else:
        return render(request, 'fa_admin/publish-errors.html',
            {'errors': errors, 'filename': filename, 'mode': 'publish', 'exception': err})
Example #44
0
    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")
            return

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        credentials = {}
        if options.get('username') is not None:
            credentials['EXISTDB_SERVER_USER'] = options.get('username')
        if options.get('password') is not None:
            credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password')

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.

            if credentials:
                # NOTE: override_settings is a test utility, but this is currently
                # the simplest way to specify credentials, since by default existdb
                #
                with override_settings(**credentials):
                    self.db = ExistDB(timeout=None)
            else:
                self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group or specify different credentials."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)
Example #45
0
 def setUp(self):
     self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
     load_fixtures(self.db)
     self.qs = QuerySet(using=self.db, xpath='/root', collection=COLLECTION, model=QueryTestModel)
Example #46
0
class Command(BaseCommand):
    help = """Tasks for managing eXist-db index configuration file.

Available subcommands:
  load-index      - load index configuration file to eXist
  show-index      - show the contents of index configuration file currently in eXist
  index-info      - show information about index configuration file in eXist (owner, date modified, etc.)
  remove-index    - remove index configuration from eXist
  reindex         - reindex the configured eXist collection with the loaded index
  """

    arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex']

    args = ' | '. join(arg_list)

    def get_password_option(option, opt, value, parser):
        setattr(parser.values, option.dest, getpass())


    option_list = BaseCommand.option_list + (
        make_option('--username', '-u',
            dest='username',
            action='store',
            help='''Username to use when connecting to eXist (overrides any in local settings)'''),
        make_option('--password', '-p',
            dest='password',
            action='callback', callback=get_password_option,
            help='''Prompt for password (required when --username is specified)'''),
        )


    # FIXME/TODO: possibly convert into a django LabelCommand

    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        credentials = {}
        if options.get('username') is not None:
            credentials['EXISTDB_SERVER_USER'] = options.get('username')
        if options.get('password') is not None:
            credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password')

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.

            if credentials:
                # NOTE: override_settings is a test utility, but this is currently
                # the simplest way to specify credentials, since by default existdb
                #
                with override_settings(**credentials):
                    self.db = ExistDB(timeout=None)
            else:
                self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group or specify different credentials."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)
Example #47
0
from os import walk
from eulexistdb.db import ExistDB
from roche.settings import EXISTDB_SERVER_URL
from roche.settings import SOLR_SERVER_URL

import sunburnt
import libxslt
import libxml2

from browser.models import RocheTEI
from eulexistdb.query import QuerySet

#
# Timeout higher?
#
xmldb = ExistDB(timeout=60)

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in sorted(filenames):
            with open(os.path.join(dirpath, filename)) as f:
                print "--" + os.path.join(dirpath, filename)
                try:
                    xmldb.load(f, os.path.join('docker', 'texts', dirpath, filename), True)
                except:
Example #48
0
    def handle(self, *files, **options):
        verbosity = int(options.get('verbosity', self.v_normal))

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \
           not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return

        self.db = ExistDB()
        self.cbgeocoder = CodebookGeocoder()

        # initalize progress bar
        pbar = None
        total = len(files)
        # init progress bar if processing enough files, running on a terminal
        if total >= 10 and os.isatty(sys.stderr.fileno()):
            widgets = [Percentage(), ' (', SimpleProgress(), ')',
                       Bar(), ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=total).start()

        errored = 0
        loaded = 0
        for f in files:
            success = False

            if pbar:
                pbar.update(errored + loaded)

            try:
                # full path location where file will be loaded in exist db collection
                dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f)
                # TODO: any error checking? validation?

                start = time.time()
                cb = load_xmlobject_from_file(f, CodeBook)
                logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start))

                start = time.time()
                self.prep(cb)
                logger.debug('%s prepped in %f sec' % (f, time.time() - start))
                # load to eXist from string since DDI documents aren't that large,
                # rather than reloading the file
                if not options.get('dryrun', False):
                    start = time.time()
                    success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True)
                    logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start))

            except IOError as e:
                self.stdout.write("Error opening %s: %s" % (f, e))
                errored += 1

            except ExistDBException as e:
                self.stdout.write("Error: failed to load %s to eXist" % f)
                self.stdout.write(e.message())
                errored += 1

            if not options.get('dryrun', False) and success:
                loaded += 1
                if verbosity > self.v_normal:
                    self.stdout.write("Loaded %s as %s" % (f, dbpath))

                try:
                    os.remove(f)
                except OSError as e:
                    self.stdout.write('Error removing %s: %s' % (f, e))

        if pbar:
           pbar.finish()

        # output a summary of what was done if more than one file was processed
        if verbosity >= self.v_normal:
            if loaded > 1:
                self.stdout.write("%d document%s loaded" % \
                                  (loaded, 's' if loaded != 1 else ''))
            if errored > 1:
                self.stdout.write("%d document%s with errors" % \
                                  (errored, 's' if errored != 1 else ''))
import os


from os import walk
from eulexistdb.db import ExistDB

#
# Timeout higher?
#

#
# http://username:[email protected]:8080/exist
#
# YOU NEED TO INSERT THE USER AND PASSWORD HERE
#xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist')
xmldb = ExistDB('http://*****:*****@localhost:8080/exist')

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                print "--" + dirpath + '/' + filename
                xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True)

#
Example #50
0
class ExistQueryTest__FullText(unittest.TestCase):
    # when full-text indexing is enabled, eXist must index files when they are loaded to the db
    # this makes tests *significantly* slower
    # any tests that require full-text queries should be here

    # sample lucene configuration for testing full-text queries
    FIXTURE_INDEX = '''
    <collection xmlns="http://exist-db.org/collection-config/1.0">
        <index>
            <lucene>
                <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
                <text qname="description"/>
                <text qname="root"/>
            </lucene>
        </index>
    </collection>
    '''

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        # create index for collection - should be applied to newly loaded files
        self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX)

        load_fixtures(self.db)

        self.qs = QuerySet(using=self.db, xpath='/root',
                           collection=COLLECTION, model=QueryTestModel)

    def tearDown(self):
        self.db.removeCollection(COLLECTION)
        self.db.removeCollectionIndex(COLLECTION)

    def test_filter_fulltext_terms(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count())

    def test_filter_fulltext_options(self):
        qs = QuerySet(using=self.db, xpath='/root',
                      collection=COLLECTION, model=QueryTestModel,
                      fulltext_options={'default-operator': 'and'})
        # search for terms present in fixtures - but not both present in one doc
        fqs = qs.filter(description__fulltext_terms='only third')
        # for now, just confirm that the option is passed through to query
        self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery())
        # TODO: test this properly!
        # query options not supported in current version of eXist
        # self.assertEqual(0, fqs.count())

    def test_order_by__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score')
        self.assertEqual('one', fqs[0].name)    # one appears 3 times, should be first

    def test_only__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name')
        self.assert_(isinstance(fqs[0], QueryTestModel))  # actually a Partial type derived from this
        # fulltext score attribute should be present
        self.assertNotEqual(fqs[0].fulltext_score, None)
        self.assert_(float(fqs[0].fulltext_score) > 0.5)    # full-text score should be a float

    def test_fulltext_highlight(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        # result from fulltext search - by default, xml should have exist:match tags
        self.assert_('<exist:match' in fqs[0].serialize())

        fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # order of args in the same filter should not matter
        fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two')
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # separate filters should also work
        fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

    def test_highlight(self):
        fqs = self.qs.filter(highlight='supercalifragilistic')
        self.assertEqual(4, fqs.count(),
                         "highlight filter returns all documents even though search term is not present")

        fqs = self.qs.filter(highlight='one').order_by('id')
        self.assert_('<exist:match' in fqs[0].serialize())

    def test_match_count(self):
        fqs = self.qs.filter(id='one', highlight='one').only('match_count')
        self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found")

    def test_using(self):
        fqs = self.qs.using('new-collection')
        # using should update the collection on the xquery object
        self.assertEqual('new-collection', fqs.query.collection)