Ejemplo n.º 1
0
class ModelTest(unittest.TestCase):
    COLLECTION = EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
            username=EXISTDB_SERVER_USER, password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml')
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml')

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())

    def test_sibling_query(self):
        # test sibling node access via 'also'
        exc = Exclamation.objects.filter(text='Au revoir').also('next').get()
        self.assertEqual('monde', exc.next)
Ejemplo n.º 2
0
class ModelTest(unittest.TestCase):
    COLLECTION = settings.EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB()
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-english.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures', 'goodbye-french.xml')
        loaded = self.db.load(open(fixture), self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())
Ejemplo n.º 3
0
def preview(request, archive):
    if request.method == 'POST':

        archive = get_object_or_404(Archive, slug=archive)
        filename = request.POST['filename']

        errors = []

        try:
            # only load to exist if document passes publication check
            ok, response, dbpath, fullpath = _prepublication_check(request, filename,
                archive, mode='preview')
            if ok is not True:
                return response

            db = ExistDB()
            # load the document to the *preview* collection in eXist with the same fileneame
            preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename
            # make sure the preview collection exists, but don't complain if it's already there
            success = db.load(open(fullpath, 'r'), preview_dbpath, overwrite=True)
        except ExistDBException, e:
            success = False
            errors.append(e.message())

        if success:
            # load the file as a FindingAid object so we can generate the preview url
            ead = load_xmlobject_from_file(fullpath, FindingAid)
            messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename)
            # redirect to document preview page with code 303 (See Other)
            return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid}))
        else:
            return render(request, 'fa_admin/publish-errors.html',
                    {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': e})
Ejemplo n.º 4
0
    def test_ead_lastmodified(self):
        modified = ead_lastmodified('rqst', 'abbey244')
        self.assert_(isinstance(modified, datetime),
                     "ead_lastmodified should return a datetime object")
        date_format = '%Y-%m-%d'
        expected = datetime.now().strftime(date_format)
        value = modified.strftime(date_format)
        self.assertEqual(expected, value,
                     'ead lastmodified should be today, expected %s, got %s' % (expected, value))

        # invalid eadid
        self.assertRaises(Http404, ead_lastmodified, 'rqst', 'bogusid')

        db = ExistDB()
        # preview document - load fixture to preview collection
        fullpath = path.join(exist_fixture_path, 'raoul548.xml')
        db.load(open(fullpath, 'r'), settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
        preview_modified = ead_lastmodified('rqst', 'raoul548', preview=True)
        self.assert_(isinstance(preview_modified, datetime),
                     "ead_lastmodified should return a datetime object")
        # clean up
        db.removeDocument(settings.EXISTDB_PREVIEW_COLLECTION + '/raoul548.xml')
Ejemplo n.º 5
0
def preview(request, archive):
    if request.method == 'POST':

        archive = get_object_or_404(Archive, slug=archive)
        filename = request.POST['filename']

        errors = []
        err = None

        try:
            # only load to exist if document passes publication check
            ok, response, dbpath, fullpath = _prepublication_check(request, filename,
                archive, mode='preview')
            if ok is not True:
                return response

            db = ExistDB()
            # load the document to the *preview* collection in eXist with the same fileneame
            preview_dbpath = settings.EXISTDB_PREVIEW_COLLECTION + "/" + filename
            # make sure the preview collection exists, but don't complain if it's already there
            success = db.load(open(fullpath, 'r'), preview_dbpath)
        except ExistDBException as err:
            success = False
            errors.append(err.message())

        if success:
            # load the file as a FindingAid object so we can generate the preview url
            ead = load_xmlobject_from_file(fullpath, FindingAid)
            messages.success(request, 'Successfully loaded <b>%s</b> for preview.' % filename)
            # redirect to document preview page with code 303 (See Other)
            return HttpResponseSeeOtherRedirect(reverse('fa-admin:preview:findingaid', kwargs={'id': ead.eadid}))
        else:
            # no exception but no success means the load failed;
            # *probably* due to insufficient permissions
            if errors == [] and success == False:
                errors.append('Failed to load the document to the preview collection')

            return render(request, 'fa_admin/publish-errors.html',
                    {'errors': errors, 'filename': filename, 'mode': 'preview', 'exception': err})

    # NOTE: preview list is not used anymore; functionality is handled
    # by main admin view; if we revisit preview list, to be more usable it
    # should be filterable by archive
    else:
        fa = get_findingaid(preview=True, only=['eadid', 'list_title', 'last_modified'],
                            order_by='last_modified')
        return render(request, 'fa_admin/preview_list.html',
                {'findingaids': fa, #'querytime': [fa.queryTime()]
                })
Ejemplo n.º 6
0
class ModelTest(unittest.TestCase):
    COLLECTION = settings.EXISTDB_TEST_COLLECTION

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL,
                          username=EXISTDB_SERVER_USER,
                          password=EXISTDB_SERVER_PASSWORD)
        self.db.createCollection(self.COLLECTION, True)

        test_dir = os.path.dirname(os.path.abspath(__file__))
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-english.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-english.xml', True)
        fixture = os.path.join(test_dir, 'exist_fixtures',
                               'goodbye-french.xml')
        loaded = self.db.load(open(fixture),
                              self.COLLECTION + '/goodbye-french.xml', True)

        # temporarily set test collection as root exist collection
        self._root_collection = settings.EXISTDB_ROOT_COLLECTION
        settings.EXISTDB_ROOT_COLLECTION = self.COLLECTION

    def tearDown(self):
        self.db.removeCollection(self.COLLECTION)

        settings.EXISTDB_ROOT_COLLECTION = self._root_collection

    def test_manager(self):
        partings = Parting.objects.all()
        self.assertEquals(2, partings.count())

    def test_sibling_query(self):
        # test sibling node access via 'also'
        exc = Exclamation.objects.filter(text='Au revoir').also('next').get()
        self.assertEqual('monde', exc.next)
Ejemplo n.º 7
0
#
# http://username:[email protected]:8080/exist
#
# YOU NEED TO INSERT THE USER AND PASSWORD HERE
#xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist')
xmldb = ExistDB('http://*****:*****@localhost:8080/exist')

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                print "--" + dirpath + '/' + filename
                xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename, True)

#
# Load resources
#
for (dirpath, dirnames, filenames) in walk('resources'):
    xmldb.createCollection('docker' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                xmldb.load(f, 'docker' + '/' + dirpath + '/' + filename, True)
        file_name = os.path.join(collection_path, "%03d.xml" % (juan,))
        result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8",
                                  "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties",
                                  "-jar", BERTIE_JAR,
                                  "--tei",
                                  "--file", file_name,
                                  "--owl", f.name], stdout=devnull, stderr=devnull)

        # Reload single document for faster response
        xmldb = ExistDB(server_url="http://*****:*****@" + existdb_host + ":8080/exist", timeout=10)
        db_collection_path = 'docker/texts/' + \
             collection_path.replace('/docker/dublin-store/', '')
        with open(file_name) as newly_annotated_file:
            print " [ ] Reloading single document"
            try:
                xmldb.load(newly_annotated_file, os.path.join(db_collection_path, os.path.split(file_name)[1]), True)
            except:
                print "FAILED TO LOAD " + file_name

        # Send response early
        send_response("OK")

    start_uima = time.time()
    result = subprocess.call(["/usr/bin/java", "-Dfile.encoding=UTF-8",
                              "-Djava.util.logging.config.file=/docker/bertie-uima/src/main/properties/Logger.properties",
                              "-jar", BERTIE_JAR,
                              "--tei",
                              "--directory", collection_path,
                              "--owl", f.name], stdout=devnull, stderr=devnull)
    done_uima = time.time()
    print "RUNTIME"
Ejemplo n.º 9
0
 def _load_file_to_exist(self, filename):
     db = ExistDB()
     fname = path.split(filename)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     db.load(open(filename), exist_path)
Ejemplo n.º 10
0
#
# http://username:[email protected]:8080/exist
#
# YOU NEED TO INSERT THE USER AND PASSWORD HERE
#xmldb = ExistDB('http://admin:@46.137.59.250:8080/exist')
xmldb = ExistDB('http://*****:*****@localhost:8080/exist')

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                print "--" + dirpath + '/' + filename
                xmldb.load(f, 'docker/texts' + '/' + dirpath + '/' + filename,
                           True)

#
# Load resources
#
for (dirpath, dirnames, filenames) in walk('resources'):
    xmldb.createCollection('docker' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                xmldb.load(f, 'docker' + '/' + dirpath + '/' + filename, True)
Ejemplo n.º 11
0
xmldb = ExistDB(timeout=60)

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in sorted(filenames):
            with open(os.path.join(dirpath, filename)) as f:
                print "--" + os.path.join(dirpath, filename)
                try:
                    xmldb.load(
                        f, os.path.join('docker', 'texts', dirpath, filename),
                        True)
                except:
                    print "FAILED TO LOAD!!! " + filename

#
# Load resources
#
for (dirpath, dirnames, filenames) in walk('resources'):
    xmldb.createCollection('docker' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                xmldb.load(f, os.path.join('docker', dirpath, filename), True)

#
Ejemplo n.º 12
0
class Command(BaseCommand):
    args = '<filename filename filename ...>'
    help = '''Loads XML files into the configured eXist collection.
The local copy will be *removed* after it is successfully loaded.'''

    option_list = BaseCommand.option_list + (
        make_option('--dry-run', '-n',
            dest='dryrun',
            action='store_true',
            help='''Report on what would be done, but don't delete any files'''
        ),
    )

    v_normal = 1
    def handle(self, *files, **options):
        verbosity = int(options.get('verbosity', self.v_normal))

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or \
           not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return

        self.db = ExistDB()
        self.cbgeocoder = CodebookGeocoder()

        # initalize progress bar
        pbar = None
        total = len(files)
        # init progress bar if processing enough files, running on a terminal
        if total >= 10 and os.isatty(sys.stderr.fileno()):
            widgets = [Percentage(), ' (', SimpleProgress(), ')',
                       Bar(), ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=total).start()

        errored = 0
        loaded = 0
        for f in files:
            success = False

            if pbar:
                pbar.update(errored + loaded)

            try:
                # full path location where file will be loaded in exist db collection
                dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(f)
                # TODO: any error checking? validation?

                start = time.time()
                cb = load_xmlobject_from_file(f, CodeBook)
                logger.debug('%s loaded as xml in %f sec' % (f, time.time() - start))

                start = time.time()
                self.prep(cb)
                logger.debug('%s prepped in %f sec' % (f, time.time() - start))
                # load to eXist from string since DDI documents aren't that large,
                # rather than reloading the file
                if not options.get('dryrun', False):
                    start = time.time()
                    success = self.db.load(cb.serialize(pretty=True), dbpath, overwrite=True)
                    logger.debug('%s loaded to eXist in %f sec' % (f, time.time() - start))

            except IOError as e:
                self.stdout.write("Error opening %s: %s" % (f, e))
                errored += 1

            except ExistDBException as e:
                self.stdout.write("Error: failed to load %s to eXist" % f)
                self.stdout.write(e.message())
                errored += 1

            if not options.get('dryrun', False) and success:
                loaded += 1
                if verbosity > self.v_normal:
                    self.stdout.write("Loaded %s as %s" % (f, dbpath))

                try:
                    os.remove(f)
                except OSError as e:
                    self.stdout.write('Error removing %s: %s' % (f, e))

        if pbar:
           pbar.finish()

        # output a summary of what was done if more than one file was processed
        if verbosity >= self.v_normal:
            if loaded > 1:
                self.stdout.write("%d document%s loaded" % \
                                  (loaded, 's' if loaded != 1 else ''))
            if errored > 1:
                self.stdout.write("%d document%s with errors" % \
                                  (errored, 's' if errored != 1 else ''))

    topic_id = re.compile('^(?P<org>[A-Z]+)[ .](?P<id>[IVX]+(\.[A-Z](\.[0-9]+(\.[a-z]+)?)?)?)')


    def prep(self, cb):
        # do any prep work or cleanup that needs to be done
        # before loading to exist
        self.local_topics(cb)
        self.clean_dates(cb)
        self.cbgeocoder.code_locations(cb)

    def icpsr_topic_id(self, topic):
        # generate icpsr topic id in the format needed for lookup in our
        # topic dictionary; returns None if not an ICPSR topic
        m = self.topic_id.match(topic)
        if m:
            match_info = m.groupdict()
            if match_info['org'] == 'ICPSR':
                return '%(org)s.%(id)s' % match_info

    def local_topics(self, cb):
        # convert ICPSR topics to local topics
        for t in cb.topics:
            topic_id = self.icpsr_topic_id(t.val)
            if topic_id is not None:
                new_topic = topic_mappings.get(topic_id, None)
                if new_topic:
                    cb.topics.append(Topic(val=new_topic,
                        vocab='local'))

                # conditional topics if the geographic coverage is global
                if topic_id in conditional_topics['global'] and \
                  'Global' in [unicode(gc) for gc in cb.geo_coverage]:
                    cb.topics.append(Topic(val=conditional_topics['global'][topic_id],
                                           vocab='local'))

    def clean_dates(self, cb):
        # clean up dates so we can search consistently on 4-digit years
        # or more; dates should be YYYY, YYYY-MM, or YYYY-MM-DD
        prev_date = None
        for d in cb.time_periods:
            # special case: two-digit date as second date in a cycle
            # interpret as month on the year that starts the cycle
            if d.event == 'end' and d.cycle == prev_date.cycle and \
                    len(d.date) == 2:
               d.date = '%04d-%02d' % (int(prev_date.date), int(d.date))

            elif len(d.date) < 4:
                d.date = '%04d' % int(d.date)

            # store current date as previous date for next loop, in case
            # we need to clean up an end date in a cycle
            prev_date = d
Ejemplo n.º 13
0
 def _load_file_to_exist(self, file):
     db = ExistDB()
     fname = path.split(file)[-1]
     exist_path = path.join(settings.EXISTDB_ROOT_COLLECTION, fname)
     db.load(open(file), exist_path, True)
Ejemplo n.º 14
0
    def handle(self, *args, **options):
        verbosity = int(options['verbosity'])    # 1 = normal, 0 = minimal, 2 = all
        v_normal = 1
        v_all = 2

        if options['pdf_only'] and options['skip_pdf_reload']:
            raise CommandError("Options -s and -p are not compatible")

        # check for required settings
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return

        if len(args):
            files = args
        else:
            # Note: copied from prep_ead manage command; move somewhere common?
            files = set()
            svn = svn_client()
            for archive in Archive.objects.all():
                # update to make sure we have latest version of everything
                svn.update(str(archive.svn_local_path))   # apparently can't handle unicode
                files.update(set(glob.iglob(os.path.join(archive.svn_local_path, '*.xml'))))

        if verbosity == v_all:
            print 'Documents will be loaded to configured eXist collection: %s' \
                    % settings.EXISTDB_ROOT_COLLECTION
            if options['skip_pdf_reload']:
                print "** Skipping PDFs cache reload"

        db = ExistDB()

        loaded = 0
        errored = 0
        pdf_tasks = {}

        start_time = datetime.now()

        if not options['pdf_only']:
        # unless PDF reload only has been specified, load files

            for file in files:
                try:
                    # full path location where file will be loaded in exist db collection
                    dbpath = settings.EXISTDB_ROOT_COLLECTION + "/" + os.path.basename(file)
                    errors = check_ead(file, dbpath)
                    if errors:
                        # report errors, don't load
                        errored += 1
                        print "Error: %s does not pass publication checks; not loading to eXist." % file
                        if verbosity >= v_normal:
                            print "  Errors found:"
                            for err in errors:
                                print "    %s" % err
                    else:
                        with open(file, 'r') as eadfile:
                            success = db.load(eadfile, dbpath, overwrite=True)

                        if success:
                            loaded += 1
                            if verbosity >= v_normal:
                                print "Loaded %s" % file
                            # load the file as a FindingAid object to get the eadid for PDF reload
                            ead = load_xmlobject_from_file(file, FindingAid)

                            # trigger PDF regeneration in the cache and store task result
                            # - unless user has requested PDF reload be skipped
                            if not options['skip_pdf_reload']:
                                pdf_tasks[ead.eadid.value] = reload_cached_pdf.delay(ead.eadid.value)
                                # NOTE: unlike the web admin publish, this does not
                                # generate TaskResult db records; task outcomes will be
                                # checked & reported before the script finishes
                        else:
                            errored += 1
                            print "Error: failed to load %s to eXist" % file
                except ExistDBException, e:
                    print "Error: failed to load %s to eXist" % file
                    print e.message()
                    errored += 1

            # output a summary of what was done
            print "%d document%s loaded" % (loaded, 's' if loaded != 1 else '')
            print "%d document%s with errors" % (errored, 's' if errored != 1 else '')
Ejemplo n.º 15
0
#
xmldb = ExistDB(timeout=60)

xmldb.createCollection('docker', True)
xmldb.createCollection('docker/texts', True)

os.chdir('../dublin-store')

for (dirpath, dirnames, filenames) in walk('浙江大學圖書館'):
    xmldb.createCollection('docker/texts' + '/' + dirpath, True)
    if filenames:
        for filename in sorted(filenames):
            with open(os.path.join(dirpath, filename)) as f:
                print "--" + os.path.join(dirpath, filename)
                try:
                    xmldb.load(f, os.path.join('docker', 'texts', dirpath, filename), True)
                except:
                    print "FAILED TO LOAD!!! " + filename

#
# Load resources
#
for (dirpath, dirnames, filenames) in walk('resources'):
    xmldb.createCollection('docker' + '/' + dirpath, True)
    if filenames:
        for filename in filenames:
            with open(dirpath + '/' + filename) as f:
                xmldb.load(f, os.path.join('docker', dirpath, filename), True)

#
# Load TEI into solr