예제 #1
0
    def _fixture_setup(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            # load index
            if 'index' in self.exist_fixtures:
                db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION,
                        open(self.exist_fixtures['index']))
            if 'directory' in self.exist_fixtures:
                for filename in glob(path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._load_file_to_exist(filename)
            if 'files' in self.exist_fixtures:
                for filename in self.exist_fixtures['files']:
                    self._load_file_to_exist(filename)

        return super(TestCase, self)._fixture_setup()
예제 #2
0
    def _fixture_setup(self):
        if hasattr(self, 'exist_fixtures'):
            db = ExistDB()
            # load index
            if 'index' in self.exist_fixtures:
                db.loadCollectionIndex(settings.EXISTDB_ROOT_COLLECTION,
                                       open(self.exist_fixtures['index']))
            if 'directory' in self.exist_fixtures:
                for file in glob(
                        path.join(self.exist_fixtures['directory'], '*.xml')):
                    self._load_file_to_exist(file)
            if 'files' in self.exist_fixtures:
                for file in self.exist_fixtures['files']:
                    self._load_file_to_exist(file)

        return super(TestCase, self)._fixture_setup()
예제 #3
0
class Command(BaseCommand):
    help = """Tasks for managing eXist-db index configuration file.

Available subcommands:
  load-index      - load index configuration file to eXist
  show-index      - show the contents of index configuration file currently in eXist
  index-info      - show information about index configuration file in eXist (owner, date modified, etc.)
  remove-index    - remove index configuration from eXist
  reindex         - reindex the configured eXist collection with the loaded index
  """

    arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex']

    args = ' | '. join(arg_list)

    def get_password_option(option, opt, value, parser):
        setattr(parser.values, option.dest, getpass())


    option_list = BaseCommand.option_list + (
        make_option('--username', '-u',
            dest='username',
            action='store',
            help='''Username to use when connecting to eXist (overrides any in local settings)'''),
        make_option('--password', '-p',
            dest='password',
            action='callback', callback=get_password_option,
            help='''Prompt for password (required when --username is specified)'''),
        )


    # FIXME/TODO: possibly convert into a django LabelCommand

    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")
            return

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        credentials = {}
        if options.get('username') is not None:
            credentials['EXISTDB_SERVER_USER'] = options.get('username')
        if options.get('password') is not None:
            credentials['EXISTDB_SERVER_PASSWORD'] = options.get('password')

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.

            if credentials:
                # NOTE: override_settings is a test utility, but this is currently
                # the simplest way to specify credentials, since by default existdb
                #
                with override_settings(**credentials):
                    self.db = ExistDB(timeout=None)
            else:
                self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message = "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group or specify different credentials."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)
예제 #4
0
class ExistQueryTest__FullText(unittest.TestCase):
    # when full-text indexing is enabled, eXist must index files when they are loaded to the db
    # this makes tests *significantly* slower
    # any tests that require full-text queries should be here

    # sample lucene configuration for testing full-text queries
    FIXTURE_INDEX = '''
    <collection xmlns="http://exist-db.org/collection-config/1.0">
        <index>
            <lucene>
                <analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
                <text qname="description"/>
                <text qname="root"/>
            </lucene>
        </index>
    </collection>
    '''

    def setUp(self):
        self.db = ExistDB(server_url=EXISTDB_SERVER_URL)
        # create index for collection - should be applied to newly loaded files
        self.db.loadCollectionIndex(COLLECTION, self.FIXTURE_INDEX)

        load_fixtures(self.db)

        self.qs = QuerySet(using=self.db, xpath='/root',
                           collection=COLLECTION, model=QueryTestModel)

    def tearDown(self):
        self.db.removeCollection(COLLECTION)
        self.db.removeCollectionIndex(COLLECTION)

    def test_filter_fulltext_terms(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        self.assertEqual(1, fqs.count(),
                         "should get 1 match for fulltext_terms search on = 'only two' (got %s)" % fqs.count())

    def test_filter_fulltext_options(self):
        qs = QuerySet(using=self.db, xpath='/root',
                      collection=COLLECTION, model=QueryTestModel,
                      fulltext_options={'default-operator': 'and'})
        # search for terms present in fixtures - but not both present in one doc
        fqs = qs.filter(description__fulltext_terms='only third')
        # for now, just confirm that the option is passed through to query
        self.assert_('<default-operator>and</default-operator>' in fqs.query.getQuery())
        # TODO: test this properly!
        # query options not supported in current version of eXist
        # self.assertEqual(0, fqs.count())

    def test_order_by__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').order_by('-fulltext_score')
        self.assertEqual('one', fqs[0].name)    # one appears 3 times, should be first

    def test_only__fulltext_score(self):
        fqs = self.qs.filter(description__fulltext_terms='one').only('fulltext_score', 'name')
        self.assert_(isinstance(fqs[0], QueryTestModel))  # actually a Partial type derived from this
        # fulltext score attribute should be present
        self.assertNotEqual(fqs[0].fulltext_score, None)
        self.assert_(float(fqs[0].fulltext_score) > 0.5)    # full-text score should be a float

    def test_fulltext_highlight(self):
        fqs = self.qs.filter(description__fulltext_terms='only two')
        # result from fulltext search - by default, xml should have exist:match tags
        self.assert_('<exist:match' in fqs[0].serialize())

        fqs = self.qs.filter(description__fulltext_terms='only two', highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # order of args in the same filter should not matter
        fqs = self.qs.filter(highlight=False, description__fulltext_terms='only two')
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

        # separate filters should also work
        fqs = self.qs.filter(description__fulltext_terms='only two').filter(highlight=False)
        # with highlighting disabled, should not have exist:match tags
        self.assert_('<exist:match' not in fqs[0].serialize())

    def test_highlight(self):
        fqs = self.qs.filter(highlight='supercalifragilistic')
        self.assertEqual(4, fqs.count(),
                         "highlight filter returns all documents even though search term is not present")

        fqs = self.qs.filter(highlight='one').order_by('id')
        self.assert_('<exist:match' in fqs[0].serialize())

    def test_match_count(self):
        fqs = self.qs.filter(id='one', highlight='one').only('match_count')
        self.assertEqual(fqs[0].match_count, 4, "4 matched words should be found")

    def test_using(self):
        fqs = self.qs.using('new-collection')
        # using should update the collection on the xquery object
        self.assertEqual('new-collection', fqs.query.collection)
예제 #5
0
class Command(BaseCommand):    
    help = """Tasks for managing eXist-db index configuration file.

Available subcommands:
  load-index      - load index configuration file to eXist
  show-index      - show the contents of index configuration file currently in eXist
  index-info      - show information about index configuration file in eXist (owner, date modified, etc.)
  remove-index    - remove index configuration from eXist
  reindex         - reindex the configured eXist collection with the loaded index
  """

    arg_list = ['load-index', 'show-index', 'index-info', 'remove-index', 'reindex']

    args = ' | '. join(arg_list)

    # FIXME/TODO: possibly convert into a django LabelCommand 
    
    def handle(self, *args, **options):
        if not len(args) or args[0] == 'help':
            print self.help
            return

        cmd = args[0]
        if cmd not in self.arg_list:
            print "Command '%s' not recognized" % cmd
            print self.help
            return

        # check for required settings (used in all modes)
        if not hasattr(settings, 'EXISTDB_ROOT_COLLECTION') or not settings.EXISTDB_ROOT_COLLECTION:
            raise CommandError("EXISTDB_ROOT_COLLECTION setting is missing")
            return
        if not hasattr(settings, 'EXISTDB_INDEX_CONFIGFILE') or not settings.EXISTDB_INDEX_CONFIGFILE:
            raise CommandError("EXISTDB_INDEX_CONFIGFILE setting is missing")
            return

        collection = settings.EXISTDB_ROOT_COLLECTION
        index = settings.EXISTDB_INDEX_CONFIGFILE

        try:
            # Explicitly request no timeout (even if one is configured
            # in django settings), since some tasks (such as
            # reindexing) could take a while.
            self.db = ExistDB(timeout=None)

            # check there is already an index config
            hasindex = self.db.hasCollectionIndex(collection)

            # for all commands but load, nothing to do if config collection does not exist
            if not hasindex and cmd != 'load-index':
                raise CommandError("Collection %s has no index configuration" % collection)

            if cmd == 'load-index':
                # load collection index to eXist

                # no easy way to check if index is different, but give some info to user to help indicate
                if hasindex:
                    index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                    print "Collection already has an index configuration; last modified %s\n" % index_desc['modified']
                else:
                    print "This appears to be a new index configuration\n"

                message =  "eXist index configuration \n collection:\t%s\n index file:\t%s" % (collection, index)

                success = self.db.loadCollectionIndex(collection, open(index))
                if success:
                    print "Succesfully updated %s" % message
                    print """
If your collection already contains data and the index configuration
is new or has changed, you should reindex the collection.
            """
                else:
                    raise CommandError("Failed to update %s" % message)

            elif cmd == 'show-index':
                # show the contents of the the collection index config file in exist
                print self.db.getDoc(self.db._collectionIndexPath(collection))

            elif cmd == 'index-info':
                # show information about the collection index config file in exist
                index_desc = self.db.describeDocument(self.db._collectionIndexPath(collection))
                for field, val in index_desc.items():
                    print "%s:\t%s" % (field, val)

            elif cmd == 'remove-index':
                # remove any collection index in eXist
                if self.db.removeCollectionIndex(collection):
                    print "Removed collection index configuration for %s" % collection
                else:
                    raise CommandError("Failed to remove collection index configuration for %s" % collection)


            elif cmd == 'reindex':
                # reindex the collection
                if not self.db.hasCollection(collection):
                    raise CommandError("Collection %s does not exist" % collection)

                print "Reindexing collection %s" % collection
                print "-- If you have a large collection, this may take a while."
                start_time = time.time()
                success = self.db.reindexCollection(collection)
                end_time = time.time()
                if success:
                    print "Successfully reindexed collection %s" % collection
                    print "Reindexing took %.2f seconds" % (end_time - start_time)
                else:
                    print "Failed to reindexed collection %s" % collection
                    print "-- Check that the configured exist user is in the exist DBA group."


        except Exception as err:
            # better error messages would be nice...
            raise CommandError(err)