Ejemplo n.º 1
0
def visual_places(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    places = []
    for q in qs:
        places.extend(q.place_names)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_places = {}

    return render_to_response('r/visual_places.html', {
        'tei_documents': qs,
        'places': places,
        'juan': juan,
    },
                              context_instance=RequestContext(request))
Ejemplo n.º 2
0
def index(request):
    # XML and SPARQL numbers

    # Count texts and authors
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')
    # TODO: order by title
    qs = qs.order_by('title_en')

    number_texts = qs.count()
    number_authors = qs.distinct().count()

    wiki_pages = []
    for page in sorted(os.listdir("/docker/dublin-store/sinology/mainSpace")):
        wiki_pages.append([page.replace(" ", "%20"), page])

    data = {
        'number_texts': number_texts,
        'number_authors': number_authors,
        'tei_documents': qs,
        "wiki_pages": wiki_pages,
    }

    return render(request, 'roche/index.html', data)
Ejemplo n.º 3
0
def index_title(request, letter):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    # filter by titles starting with letter
    qs = qs.filter(title__startswith=letter)

    return render_to_response('browser/index.html', {'tei_documents': qs},
                              context_instance=RequestContext(request))
Ejemplo n.º 4
0
def index(request):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # Make titles unique (maybe there is a better method?)
    qs = qs.filter(chapter='1')
    qs = qs.only('title', 'title_en', 'author')

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Ejemplo n.º 5
0
 def _remove_file_from_exist(self, file):
     db = ExistDB()
     fname = path.split(file)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     # tests could remove fixtures, so an exception here is not a problem
     try:
         db.removeDocument(exist_path)
     except ExistDBException:
         # any way to determine if error ever needs to be reported?
         pass
Ejemplo n.º 6
0
def index_author(request, author, startswith):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=Tei)

    if startswith:
        # filter by authors starting with letter
        qs = qs.filter(author__startswith=author)
    else:
        qs = qs.filter(author=author)

    return render_to_response('browser/index.html', {'tei_documents': qs}, context_instance=RequestContext(request))
Ejemplo n.º 7
0
def index(request):
    xmldb = ExistDB()
    qs = QuerySet(using=xmldb,
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI,
                  fulltext_options={'default-operator': 'and'})
    qs = qs.filter(body__fulltext_terms='至')

    return render_to_response('search/index.html', {'tei_documents': qs})
Ejemplo n.º 8
0
def text_info(request, title):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)

    result = ""
    place_names = []
    persons = []
    terms = []
    chapter_titles = []
    for q in qs:
        number_characters = 0
        for d in q.body.div:
            text = re.sub(RE_INTERPUCTION, '', d.text)
            text = text.replace("\n", "")
            #text = text.replace("", "")
            number_characters += len(text)

        if q.chapter_title:
            content = q.chapter_title.replace(" ", "").replace("\n", "")[:70]
        else:
            content = 'XXX'

        if q.chapter:
            chapter = q.chapter
        else:
            chapter = 1

        chapter_titles.append([chapter, content, number_characters])

        #place_names.extend(q.place_names)
        #persons.extend(q.persons)
        #terms.extend(q.terms)

    place_names = list(set(place_names))
    persons = list(set(persons))
    terms = list(set(terms))

    # Place names for leaflet
    # place_names
    js_data = json.dumps([[[50.5, 30.5], "test"]])

    return render_to_response('browser/text_view_info.html', {
        'tei_documents': qs,
        'tei_transform': result,
        'place_names': place_names,
        'persons': persons,
        'terms': terms,
        'js_data': js_data,
        'chapter_titles': sorted(chapter_titles)
    },
                              context_instance=RequestContext(request))
Ejemplo n.º 9
0
    def _fixture_teardown(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            if 'index' in self.exist_fixtures:
                db.removeCollectionIndex(settings.EXISTDB_ROOT_COLLECTION)
            if 'directory' in self.exist_fixtures:
                for file in glob(
                        path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._remove_file_from_exist(file)
            if 'files' in self.exist_fixtures:
                for file in self.exist_fixtures['files']:
                    self._remove_file_from_exist(file)

        return super(TestCase, self)._fixture_teardown()
Ejemplo n.º 10
0
def visual_timeline(request, title, juan):
    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)
    qs = qs.filter(title=title, chapter=juan)

    persons = []
    for q in qs:
        persons.extend(q.persons)

    sparql = SPARQLWrapper2(FUSEKI_QUERY_URL)
    sparql.setQuery(SPARQL_TIMELINE_QUERY)

    try:
        sparql_result = sparql.query()
    except:
        sparql_result = {}

    sparql_persons = {}
    if sparql_result:
        for binding in sparql_result.bindings:
            sparql_persons[binding[u"person"].value] = [
                binding[u"birthYear"].value, binding[u"deathYear"].value
            ]

    #persons = [u"范仲淹", u"蘇舜欽", u"韓愈"]
    timeline_persons = []
    for p in set(persons):
        if sparql_persons.get(p, None):
            row = [
                p,
            ]
            row.append(int(sparql_persons[p][0]))
            row.append(int(sparql_persons[p][1]))
            timeline_persons.append(row)

    from operator import itemgetter

    timeline_persons = sorted(timeline_persons, key=itemgetter(1))
    timeline_persons = json.dumps(timeline_persons)

    return render_to_response('r/visual_timeline.html', {
        'tei_documents': qs,
        'timeline_persons': timeline_persons,
        'juan': juan
    },
                              context_instance=RequestContext(request))
Ejemplo n.º 11
0
    def _fixture_setup(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            # load index
            if 'index' in self.exist_fixtures:
                db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION,
                                       open(self.exist_fixtures['index']))
            if 'directory' in self.exist_fixtures:
                for file in glob(
                        path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._load_file_to_exist(file)
            if 'files' in self.exist_fixtures:
                for file in self.exist_fixtures['files']:
                    self._load_file_to_exist(file)

        return super(TestCase, self)._fixture_setup()
Ejemplo n.º 12
0
def text_download(request, title, file_format, juan=0):
    """
    Download a text or a single chapter as plain text file
    or as a (colored) pdf.
    """
    import pinyin

    pinyin_title = pinyin.get(title)

    qs = QuerySet(using=ExistDB(),
                  xpath='/tei:TEI',
                  collection='docker/texts/',
                  model=RocheTEI)

    qs = qs.filter(title=title)
    if juan:
        qs = qs.filter(chapter=juan)

    result = ""
    for q in qs:
        for d in q.body.div:
            result += d.text.replace(" ", "").replace("\n", "").replace(
                "\t", "").replace(u"。", u"。\n\n")

    if file_format == 'txt':
        response = HttpResponse(content_type='text/plain')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.txt"'.format(
                pinyin_title)
        response.write(result)
    else:
        from fpdf import FPDF

        pdf = FPDF(unit='mm', format='A4')
        pdf.add_page()
        pdf.add_font('Droid', '', 'DroidSansFallbackFull.ttf', uni=True)
        pdf.set_font('Droid', '', 12)
        pdf.write(5, unicode(result))
        response = HttpResponse(pdf.output(dest='S'),
                                content_type='application/pdf')
        response[
            'Content-Disposition'] = 'attachment; filename="{}.pdf"'.format(
                pinyin_title)

    return response
Ejemplo n.º 13
0
    def restore_root_collection(self):
        # if use_test_collection didn't run, don't change anything
        delattr(settings, "EXISTDB_ROOT_COLLECTION_REAL")

        if self.stored_default_collection is not None:
            print >> sys.stderr, "Removing eXist Test Collection: %s" % settings.EXISTDB_ROOT_COLLECTION
            # before restoring existdb non-test root collection, init db connection
            db = ExistDB()
            try:
                # remove test collection
                db.removeCollection(settings.EXISTDB_ROOT_COLLECTION)
            except ExistDBException, e:
                print >> sys.stderr, "Error removing collection %s: %s" \
                    % (settings.EXISTDB_ROOT_COLLECTION, e)

            print >> sys.stderr, "Restoring eXist Root Collection: %s" \
                % self.stored_default_collection
            settings.EXISTDB_ROOT_COLLECTION = self.stored_default_collection
Ejemplo n.º 14
0
    def use_test_collection(self):
        self.stored_default_collection = getattr(settings,
                                                 "EXISTDB_ROOT_COLLECTION",
                                                 None)
        setattr(settings, "EXISTDB_ROOT_COLLECTION_REAL",
                self.stored_default_collection)

        if getattr(settings, "EXISTDB_TEST_COLLECTION", None):
            settings.EXISTDB_ROOT_COLLECTION = settings.EXISTDB_TEST_COLLECTION
        else:
            settings.EXISTDB_ROOT_COLLECTION = getattr(
                settings, "EXISTDB_ROOT_COLLECTION", "/default") + "_test"

        print >> sys.stderr, "Creating eXist Test Collection: %s" % \
            settings.EXISTDB_ROOT_COLLECTION
        # now that existdb root collection has been set to test collection, init db connection
        db = ExistDB()
        # create test collection (don't complain if collection already exists)
        db.createCollection(settings.EXISTDB_ROOT_COLLECTION, True)
Ejemplo n.º 15
0
    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
                          username=EXISTDB_SERVER_USER,
                          password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-english.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-french.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION
Ejemplo n.º 16
0
def text_view(request, title):
    qs = QuerySet(using=ExistDB(), xpath='/tei:TEI', collection='docker/texts/', model=RocheTEI)

    # filter by title
    qs = qs.filter(title=title).order_by('chapter')

    max_juan = qs.count()

    result = ""
    for q in qs:
        result = result + q.body.xsl_transform(xsl=XSL_TRANSFORM_1).serialize()

    text_title = qs[0].title

    data = {'tei_documents': qs, 'tei_transform': result,
            'text_title': text_title, 'max_juan': max_juan, }

    return render_to_response('browser/text_view.html', data,
                              context_instance=RequestContext(request))
Ejemplo n.º 17
0
    def get_query_set(self):
        """
        Get the default :class:`eulexistdb.db.QuerySet` returned
        by this ``Manager``. Typically this returns a ``QuerySet`` based on
        the ``Manager``'s `xpath`, evaluated in the
        ``settings.EXISTDB_ROOT_COLLECTION`` on a default
        :class:`eulexistdb.db.ExistDB`.

        This is a convenient point for developers to customize an object's
        managers. Deriving a child class from Manager and overriding or
        extending this method is a handy way to create custom queries
        accessible from an :class:`~eulexistdb.models.XmlModel`.
        """

        if hasattr(settings, 'EXISTDB_FULLTEXT_OPTIONS'):
            fulltext_opts = settings.EXISTDB_FULLTEXT_OPTIONS
        else:
            fulltext_opts = {}

        return QuerySet(model=self.model,
                        xpath=self.xpath,
                        using=ExistDB(),
                        collection=settings.EXISTDB_ROOT_COLLECTION,
                        fulltext_options=fulltext_opts)
Ejemplo n.º 18
0
    def render(self, context):
        from browser.models import DDBCPlaceName

        try:
            self.place_name = self.place_name.resolve(context)
        except template.VariableDoesNotExist:
            return ''

        qs = QuerySet(using=ExistDB(),
                      xpath='/tei:TEI//tei:place',
                      collection='docker/resources/',
                      model=DDBCPlaceName)
        qs = qs.filter(place_names=self.place_name)

        ddbc_output = u''
        for q in qs:
            ddbc_output += '<p>'
            ddbc_output += 'Other names: ' + u', '.join(q.place_names) + '<br>'
            ddbc_output += 'District: ' + q.district + '<br>'
            ddbc_output += 'Notes: ' + u' '.join(q.notes) + '<br>'
            ddbc_output += 'Location: ' + q.geo + '<br>'
            ddbc_output += '</p>'

        return ddbc_output
Ejemplo n.º 19
0
import os

from os import walk
from eulexistdb.db import ExistDB

#
# Timeout higher?
#

#
# http://username:[email protected]:8080/exist
#
# YOU NEED TO INSERT THE USER AND PASSWORD HERE
#xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist')
xmldb = ExistDB('http://*****:*****@localhost:8080/exist')

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                print "--" + dirpath + '/' + filename
                xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename,
                           True)
Ejemplo n.º 20
0
 def _load_file_to_exist(self, file):
     db = ExistDB()
     fname = path.split(file)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     db.load(open(file), exist_path, True)
Ejemplo n.º 21
0
from os import walk
from eulexistdb.db import ExistDB
from roche.settings import EXISTDB_SERVER_URL
from roche.settings import SOLR_SERVER_URL

import sunburnt
import libxslt
import libxml2

from browser.models import RocheTEI
from eulexistdb.query import QuerySet

#
# Timeout higher?
#
xmldb = ExistDB(timeout=60)

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in sorted(filenames):
            with open(os.path.join(dirpath, filename)) as f:
                print "--" + os.path.join(dirpath, filename)
                try:
                    xmldb.load(
                        f, os.path.join('docker', 'texts', dirpath, filename),
Ejemplo n.º 22
0
    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")
            return

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        credentials = {}
        if options.get('username') is not None:
            credentials['EXISTDB_SERVER_USER'] = options.get('username')
        if options.get('password') is not None:
            credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password')

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.

            if credentials:
                # NOTE: override_settings is a test utility, but this is currently
                # the simplest way to specify credentials, since by default existdb
                #
                with override_settings(**credentials):
                    self.db = ExistDB(timeout=None)
            else:
                self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group or specify different credentials."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)