def __call__(self, command, _options): assert_running() db = get_admin_db() for collection in [settings.EXISTDB_ROOT_COLLECTION, get_collection_path("chunks"), get_collection_path("display")]: db.createCollection(collection) db.server.setPermissions(collection, command.server_user, command.btw_group, 0770)
def test_exist_path(self): """ ``exist_path`` returns good values. """ c = Chunk(data="<div/>", is_normal=True) c.save() self.assertEqual(c.exist_path("chunks"), "/".join([get_collection_path("chunks"), c.c_hash])) self.assertEqual(c.exist_path("display"), "/".join([get_collection_path("display"), c.c_hash]))
def sync_with_exist(self): self.collect() db = ExistDB() present = set() for chunk in self.filter(is_normal=True): chunk.sync_with_exist(db) present.add(chunk.c_hash) self._remove_absent(db, present, get_collection_path("chunks"))
def sync_with_exist(self): self.collect() db = ExistDB() present = set() for chunk in self.all_syncable_chunks(): chunk.sync_with_exist(db) present.add(chunk.c_hash) self._remove_absent(db, present, get_collection_path("chunks"))
def __call__(self, command, _options): """ Load the utilities into the database. This is necessary for BTW to run. """ util_path = get_collection_path("util") db = get_admin_db() if not db.hasCollection(util_path): command.create_collections([util_path]) db.query(xquery.format( "xmldb:store({db}, 'empty.xml', <doc/>)", db=util_path))
def __call__(self, command, _options): """ Load initial data into a new database. This is necessary for BTW to run. """ assert_running() from django.utils import translation translation.activate('en-us') db = ExistDB() chunk_collection_path = get_collection_path("chunks") if db.hasCollection(chunk_collection_path): db.removeCollection(chunk_collection_path) Chunk.objects.sync_with_exist() display_path = get_collection_path("display") if db.hasCollection(display_path): db.removeCollection(display_path) Chunk.objects.prepare("xml", True)
def prepare(self, kind, synchronous): if kind != "xml": raise ValueError("the manager only supports preparing XML data; " "future versions may support other kinds") self.collect() db = ExistDB() present = set() for chunk in self.filter(is_normal=True): chunk.prepare("xml", synchronous) present.add(chunk.c_hash) self._remove_absent(db, present, get_collection_path("display"))
def __call__(self, command, _options): """ Load initial data into a new database. This is necessary for BTW to run. """ assert_running() from django.utils import translation translation.activate('en-us') db = ExistDB() chunk_collection_path = get_collection_path("chunks") if db.hasCollection(chunk_collection_path): db.removeCollection(chunk_collection_path) Chunk.objects.sync_with_exist() display_path = get_collection_path("display") if db.hasCollection(display_path): db.removeCollection(display_path) Chunk.objects.prepare("xml", include_unpublished=False)
def prepare(self, kind, include_unpublished): if kind != "xml": raise ValueError("the manager only supports preparing XML data; " "future versions may support other kinds") self.collect() db = ExistDB() present = set() chunks = self.all_syncable_chunks() if not include_unpublished: chunks = chunks.filter(changerecord__published=True) for chunk in chunks: chunk.prepare("xml", True) present.add(chunk.c_hash) self._remove_absent(db, present, get_collection_path("display"))
def test_no_exist_document(self): """ When the exist document is missing, raise an error. We want an error because it indicates something really broken about our internal state. We should never have metadata without a corresponding XML file. """ cr = ChangeRecord.objects.get(pk=1) chunk = cr.c_hash self.assertIsNotNone(cache.get(chunk.display_key("xml"))) cache.clear() db = ExistDB() db.removeCollection(get_collection_path("display"), True) with self.assertRaises(ExistDBException): tasks.fetch_xml(chunk.c_hash)
def hashes_with_semantic_field(self, sf): """ Returns a set of chunk *hashes* that contain the semantic field requested. """ db = ExistDB() chunks = set() for query_chunk in query_iterator(db, xquery.format( """\ for $m in collection({db})//btw:sf[@ref = {path}] return util:document-name($m)""", db=get_collection_path("display"), path=sf)): for result in query_chunk.values: chunks.add(result) return chunks
def hashes_with_semantic_field(self, sf): """ Returns a set of chunk *hashes* that contain the semantic field requested. """ db = ExistDB() chunks = set() for query_chunk in query_iterator( db, xquery.format("""\ for $m in collection({db})//btw:sf[@ref = {path}] return util:document-name($m)""", db=get_collection_path("display"), path=sf)): for result in query_chunk.values: chunks.add(result) return chunks
def __call__(self, command, _options): assert_running() db = get_admin_db() collection = get_collection_path(None) db.removeCollectionIndex(collection)
def __call__(self, command, _options): assert_running() db = get_admin_db() collection = get_collection_path(None) db.loadCollectionIndex(collection, open(command.chunk_index, 'r')) db.reindexCollection(collection)
def __call__(self, command, _options): command.create_collections([settings.EXISTDB_ROOT_COLLECTION, get_collection_path("chunks"), get_collection_path("display")]) Loadutil()(command, _options)
def filter_queryset(self, qs): # pylint: disable=too-many-branches search_value = self.request.GET.get('search[value]', None) lemmata_only = self.request.GET.get('lemmata_only', "false") == \ "true" if usermod.can_author(self.request.user): publication_status = self.request.GET.get('publication_status', "published") search_all = self.request.GET.get('search_all', "false") == "true" if not search_all: # Remove deleted entries from the set. active = qs.filter(entry__in=Entry.objects.active_entries()) if publication_status == "published": active = active.filter(entry__latest_published=F('pk')) elif publication_status == "unpublished": active = active.filter(entry__latest=F('pk')) \ .exclude(entry__latest_published=F('pk')) elif publication_status == "both": active = active.filter(entry__latest=F('pk')) else: raise ValueError("unknown value for publication_status: " + publication_status) else: if publication_status == "published": active = qs.filter(published=True) elif publication_status == "unpublished": active = qs.filter(published=False) elif publication_status == "both": active = qs else: raise ValueError("unknown value for publication_status: " + publication_status) else: # If the user cannot author, then our queryset is already # reduced to what the user can see: the latest version of # published articles. active = qs if search_value: db = ExistDB() chunks = [] if lemmata_only: scope = "//btw:lemma" # When we do a lemma search, hits are not useful. hit = "" else: scope = "//btw:entry" hit = "{kwic:summarize($m, <config width='80'/>)}" for query_chunk in query_iterator( db, xquery.format( """\ import module namespace kwic="http://exist-db.org/xquery/kwic"; for $m in collection({db}){scope}[ft:query(., {search_text})] order by ft:score($m) descending return <result><doc>{doc}</doc><hit>{hit}</hit></result>""", db=get_collection_path("display"), scope=xquery.Verbatim(scope), doc=xquery.Verbatim("{util:document-name($m)}"), hit=xquery.Verbatim(hit), search_text=search_value)): for result in query_chunk.results: chunk = result[0].text # Content of <doc>. self.chunk_to_hits[chunk] = result[1] chunks.append(chunk) # We need to get the changerecords that pertain to these chunks. qs = active.filter(c_hash__in=set(chunks)) else: qs = active return qs
class ChunkTransactionTestCase(util.DisableMigrationsTransactionMixin, TransactionTestCase): chunk_collection_path = get_collection_path("chunks") display_collection_path = get_collection_path("display") prepare_kinds = Chunk.key_kinds def setUp(self): self.foo = foo = user_model.objects.create(username="******", password="******") scribe = Group.objects.get(name='scribe') foo.groups.add(scribe) cache.clear() db = ExistDB() db.removeCollection(self.chunk_collection_path, True) db.removeCollection(self.display_collection_path, True) return super(ChunkTransactionTestCase, self).setUp() def test_when_chunk_becomes_hidden_cached_data_is_cleared(self): """ When a ``Chunk`` becomes hidden, then its cached data is deleted. """ c = Chunk(data="<div/>", is_normal=True) c.save() keys = [c.display_key(kind) for kind in self.prepare_kinds] e = Entry() e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) db = ExistDB() self.assertIsNotNone(cache.get(c.display_key("xml"))) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) self.assertEqual( len(list_collection(db, self.display_collection_path)), 1) e.latest.hidden = True e.latest.save() for key in keys: self.assertIsNone(cache.get(key)) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) def test_when_chunk_becomes_shown_cached_data_is_created(self): """ When a ``Chunk`` becomes shown, then its cached data is created. """ c = Chunk(data="<div/>", is_normal=True) c.save() keys = [c.display_key(kind) for kind in self.prepare_kinds] e = Entry() e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) db = ExistDB() self.assertIsNotNone(cache.get(c.display_key("xml"))) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) self.assertEqual( len(list_collection(db, self.display_collection_path)), 1) e.latest.hidden = True e.latest.save() for key in keys: self.assertIsNone(cache.get(key)) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) self.assertIsNotNone(cache.get(c.display_key("xml"))) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) self.assertEqual( len(list_collection(db, self.display_collection_path)), 1)
class ChunkManagerSimpleTestCase(util.DisableMigrationsMixin, TestCase): chunk_collection_path = get_collection_path("chunks") display_collection_path = get_collection_path("display") @classmethod def setUpTestData(cls): super(ChunkManagerSimpleTestCase, cls).setUpTestData() cls.foo = user_model.objects.create(username="******", password="******") def setUp(self): self.manager = Chunk.objects return super(ChunkManagerSimpleTestCase, self).setUp() def make_reachable(self, chunk): # Make the chunk reachable e = Entry() e.update(self.foo, "q", chunk, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) return e def list_chunk_collection(self): db = ExistDB() return list_collection(db, self.chunk_collection_path) def list_display_collection(self): db = ExistDB() return list_collection(db, self.chunk_collection_path) def check_collects(self, op, *args): self.assertEqual(self.manager.count(), 0) c = Chunk(data="", is_normal=False) c.save() self.assertEqual(self.manager.count(), 1) getattr(self.manager, op)(*args) self.assertEqual(self.manager.count(), 0) def test_collect_collects_unreachable(self): """ ``collect`` collects unreachable chunks. """ self.check_collects("collect") def test_collect_does_not_collect_reachable(self): """ Does not collect reachable chunks. """ self.assertEqual(self.manager.count(), 0) c = Chunk(data="", is_normal=False) c.save() self.make_reachable(c) self.assertEqual(self.manager.count(), 1) self.manager.collect() # Not collected! self.assertEqual(self.manager.count(), 1) def test_sync_collects(self): """ ``sync_with_exist`` causes a collection of unreachable chunks. """ self.check_collects("sync_with_exist") def check_skip_abnormal_chunks(self, op, collection, *args): c = Chunk(data="", is_normal=False) c.save() self.make_reachable(c) db = ExistDB() self.assertEqual(len(list_collection(db, collection)), 0) getattr(self.manager, op)(*args) self.assertEqual(len(list_collection(db, collection)), 0) # Make sure our chunk was not collected. self.assertEqual(self.manager.count(), 1) def check_syncs_normal_chunks(self, op, collection, *args): c = Chunk(data="<div/>", is_normal=True) c.save() self.make_reachable(c) # If it does not have metadata yet, that's fine. try: c.chunkmetadata.delete() except ChunkMetadata.DoesNotExist: pass # We have to delete the collection because merely saving the # chunk causes it to be synced, but this is not what we are # testing here. We want to make sure that calling # sync_with_exist will perform the sync. db = ExistDB() db.removeCollection(collection, True) self.assertEqual(len(list_collection(db, collection)), 0) getattr(self.manager, op)(*args) self.assertEqual(len(list_collection(db, collection)), 1) # Make sure our chunk was not collected. self.assertEqual(self.manager.count(), 1) def test_sync_skips_abnormal_chunks(self): """ ``sync_with_exist`` does not sync abnormal chunks. """ self.check_skip_abnormal_chunks("sync_with_exist", self.chunk_collection_path) def test_sync_syncs_normal_chunks(self): """ ``sync_with_exist`` syncs normal chunks. """ self.check_syncs_normal_chunks("sync_with_exist", self.chunk_collection_path) def check_deletes_documents(self, op, collection, *args): c = Chunk(data="<div/>", is_normal=True) c.save() entry = self.make_reachable(c) # If it does not have metadata yet, that's fine. try: c.chunkmetadata.delete() except ChunkMetadata.DoesNotExist: pass # We have to delete the collection because merely saving the # chunk causes it to be synced, but this is not what we are # testing here. We want to make sure that calling # op will perform the sync. db = ExistDB() db.removeCollection(collection, True) self.assertEqual(len(list_collection(db, collection)), 0) op = getattr(self.manager, op) op(*args) self.assertEqual(len(list_collection(db, collection)), 1) # Make sure our chunk was not collected. self.assertEqual(self.manager.count(), 1) # Now we delete the chunk in SQL because we do not want the # ``delete`` method to be called, as it would take care of # removing the document itself. (And yes, we do interpolate # the table name. This is safe as ``Entry._meta.db_table`` is # a value under our control.) with connection.cursor() as cursor: cr = entry.latest cursor.execute( "DELETE FROM {} WHERE id = %s".format(entry._meta.db_table), [entry.pk]) # We have to do this ourselves because Django's cascading # delete is implemented at the ORM level, not the database # level. cursor.execute( "DELETE FROM {} WHERE id = %s".format(cr._meta.db_table), [cr.pk]) # Check that no collection or syncing has occurred. self.assertEqual(self.manager.count(), 1) self.assertEqual(len(list_collection(db, collection)), 1) op(*args) # Make sure our chunk was collected. self.assertEqual(self.manager.count(), 0) self.assertEqual(len(list_collection(db, collection)), 0) def test_sync_deletes_exist_documents(self): """ ``sync_with_exist`` deletes those eXist documents that belong to chunks that no longer exist. """ self.check_deletes_documents("sync_with_exist", self.chunk_collection_path) def test_prepare_collects(self): """ ``prepare`` causes a collection of unreachable chunks. """ self.check_collects("prepare", "xml", True) def test_prepare_skips_abnormal_chunks(self): """ ``prepare`` does not sync abnormal chunks. """ self.check_skip_abnormal_chunks("prepare", self.display_collection_path, "xml", True) def test_prepare_syncs_normal_chunks(self): """ ``prepare`` syncs normal chunks. """ self.check_syncs_normal_chunks("prepare", self.display_collection_path, "xml", True) def test_prepare_deletes_exist_documents(self): """ ``prepare`` deletes those eXist documents that belong to chunks that no longer exist. """ self.check_deletes_documents("prepare", self.display_collection_path, "xml", True)
class ChunkTestCase(util.DisableMigrationsMixin, TestCase): chunk_collection_path = get_collection_path("chunks") display_collection_path = get_collection_path("display") prepare_kinds = Chunk.key_kinds @classmethod def setUpTestData(cls): super(ChunkTestCase, cls).setUpTestData() cls.foo = foo = user_model.objects.create(username="******", password="******") scribe = Group.objects.get(name='scribe') cls.foo.groups.add(scribe) def setUp(self): cache.clear() return super(ChunkTestCase, self).setUp() def assertLogRegexp(self, handler, stream, regexp): handler.flush() self.assertRegex(stream.getvalue(), regexp) def test_abnormal_is_invalid(self): """ Checks that an abnormal chunk is invalid, and that its validity is saved after being computed. """ c = Chunk(data="", is_normal=False) c.save() self.assertIsNone(c._valid) self.assertFalse(c.valid) self.assertFalse( Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.") def test_valid(self): """ Checks that an normal chunk can be valid, and that its validity is saved after being computed. """ c = Chunk(data=valid_editable.decode('utf-8'), schema_version=schema_version) c.save() self.assertIsNone(c._valid) self.assertTrue(c.valid) self.assertTrue(Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.") def test_valid(self): """ Checks that an normal chunk can be valid, and that its validity is saved after being computed. """ c = Chunk(data=valid_editable.decode('utf-8'), schema_version=schema_version) c.save() self.assertIsNone(c._valid) self.assertTrue(c.valid) self.assertTrue(Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.") def test_invalid(self): """ Checks that data that is invalid is recognized as invalid, and the the validity is saved after being computed. """ # This data is just flat out invalid... data = """ <btw:entry xmlns="http://www.tei-c.org/ns/1.0" version="0.10"\ xmlns:btw="http://mangalamresearch.org/ns/btw-storage"> <btw:lemma></btw:lemma> </btw:entry> """ c = Chunk(data=data, schema_version=schema_version) c.save() self.assertIsNone(c._valid) self.assertFalse(c.valid) self.assertFalse( Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.") def test_invalid_schematron(self): """ Checks that data that is invalid only due to the schematron check is recognized as invalid, and the the validity is saved after being computed. """ tree = lxml.etree.fromstring(valid_editable) sfs = tree.xpath( "//btw:example/btw:semantic-fields | " "//btw:example-explained/btw:semantic-fields", namespaces=xml.default_namespace_mapping) for el in sfs: el.getparent().remove(el) data = lxml.etree.tostring(tree, xml_declaration=True, encoding='utf-8').decode('utf-8') self.assertTrue( util.validate_with_rng(xml.schema_for_version(schema_version), data), "the data should validate") self.assertFalse( util.schematron(xml.schematron_for_version(schema_version), data), "the data should not pass the schematron check") c = Chunk(data=data, schema_version=schema_version) c.save() self.assertIsNone(c._valid) self.assertFalse(c.valid) self.assertFalse( Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.") def test_published_false(self): """ ``published`` is false for chunks that have not been published. """ c = Chunk(data="<div/>", is_normal=True) c.save() self.assertFalse(c.published) def test_published_true(self): """ ``published`` is true for chunks that have been published. """ c = Chunk(data=valid_editable.decode('utf-8'), schema_version=schema_version) c.save() e = Entry() e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) e.latest.publish(self.foo) self.assertTrue(c.published) def test_chunks_start_hidden(self): """ ``hidden`` is ``True`` for new chunks. """ c = Chunk(data="<div/>", is_normal=True) c.save() self.assertTrue(c.hidden) def test_chunks_with_all_hidden_records_are_hidden(self): """ ``hidden`` is ``True`` if all ``ChangeRecord``s are hidden. """ c = Chunk(data="<div/>", is_normal=True) c.save() e = Entry() e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) e.latest.hidden = True e.latest.save() e = Entry() e.update(self.foo, "q", c, "foo2", ChangeRecord.CREATE, ChangeRecord.MANUAL) e.latest.hidden = True e.latest.save() self.assertTrue(c.hidden) def test_chunks_with_one_shown_record_are_shown(self): """ ``hidden`` is ``False`` if one ``ChangeRecord`` is shown. """ c = Chunk(data="<div/>", is_normal=True) c.save() e = Entry() e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE, ChangeRecord.MANUAL) e.latest.hidden = True e.latest.save() e = Entry() e.update(self.foo, "q", c, "foo2", ChangeRecord.CREATE, ChangeRecord.MANUAL) self.assertFalse(c.hidden) def test_exist_path(self): """ ``exist_path`` returns good values. """ c = Chunk(data="<div/>", is_normal=True) c.save() self.assertEqual(c.exist_path("chunks"), "/".join([get_collection_path("chunks"), c.c_hash])) self.assertEqual(c.exist_path("display"), "/".join([get_collection_path("display"), c.c_hash])) def test_exist_path_raises(self): """ ``exist_path`` raises an error if the kind is wrong. """ c = Chunk(data="<div/>", is_normal=True) c.save() with self.assertRaisesRegex(ValueError, "unknown value"): c.exist_path("invalid") def test_display_key(self): """ ``display_key`` returns good values. """ c = Chunk(data="<div/>", is_normal=True) c.save() for kind in self.prepare_kinds: self.assertEqual(c.display_key(kind), "{}_{}".format(c.c_hash, kind).encode("utf8")) def test_display_key_raises(self): """ ``display_key`` raises an error if the kind is wrong. """ c = Chunk(data="<div/>", is_normal=True) c.save() with self.assertRaisesRegex(ValueError, "unknown display key kind"): c.display_key("invalid") def test_get_cached_value_starts_task(self): """ Check that ``get_cached_value`` starts an actual task if the value is missing, and returns ``None``. """ c = Chunk(data="<div/>", is_normal=True) c.save() for kind in self.prepare_kinds: cache.clear() with util.WithStringIO(models.logger) as (stream, handler): self.assertIsNone(c.get_cached_value(kind)) self.assertLogRegexp( handler, stream, "^{0} is missing from article_display, launching task$". format(c.display_key(kind))) def test_get_cached_value_knows_about_tasks(self): """ Check that ``get_cached_value`` will log if a task is already computing the value and will return ``None``. """ c = Chunk(data="<div/>", is_normal=True) c.save() for kind in self.prepare_kinds: cache.clear() cache.set(c.display_key(kind), {"task": "foo"}) with util.WithStringIO(models.logger) as (stream, handler): self.assertIsNone(c.get_cached_value(kind)) self.assertLogRegexp( handler, stream, "^{0} is being computed by task foo$".format( c.display_key(kind))) def test_get_cached_value_returns_available_data(self): """ Check that ``get_cached_value`` will log if a task is already computing the value and will return ``None``. """ c = Chunk(data="<div/>", is_normal=True) c.save() c._create_cached_data() # We have to launch the bibl data preparation ourselves. c.prepare("bibl", True) for kind in self.prepare_kinds: expected = cache.get(c.display_key(kind)) self.assertIsNotNone(expected) self.assertEqual(c.get_cached_value(kind), expected) def check_skip_abnormal_chunks(self, op, collection, *args): c = Chunk(data="", is_normal=False) c.save() # We have to delete the collection because merely saving the # chunk causes it to be synced, but this is not what we are # testing here. We want to make sure that calling # sync_with_exist will perform the sync. db = ExistDB() db.removeCollection(collection, True) self.assertEqual(len(list_collection(db, collection)), 0) getattr(c, op)(*args) self.assertEqual(len(list_collection(db, collection)), 0) def test_sync_skips_abnormal_chunks(self): """ ``sync_with_exist`` skips abnormal chunks. """ self.check_skip_abnormal_chunks("sync_with_exist", self.chunk_collection_path) def check_sync_normal_chunks(self, op, collection, *args): c = Chunk(data="<div/>", is_normal=True) c.save() # We have to delete the collection because merely saving the # chunk causes it to be synced, but this is not what we are # testing here. We want to make sure that calling # sync_with_exist will perform the sync. db = ExistDB() db.removeCollection(collection, True) self.assertEqual(len(list_collection(db, collection)), 0) ret = getattr(c, op)(*args) self.assertEqual(len(list_collection(db, collection)), 1) return ret def test_sync_syncs_normal_chunks(self): """ ``sync_with_exist`` syncs normal chunks. """ self.check_sync_normal_chunks("sync_with_exist", self.chunk_collection_path) def test_sync_handles_overwrites(self): """ ``sync_with_exist`` will not overwrite documents already in eXist. """ db = ExistDB() db.removeCollection(self.chunk_collection_path, True) c = Chunk(data="<div/>", is_normal=True) c.save() c.sync_with_exist() self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) with mock.patch('lexicography.models.ExistDB.load') as load_mock: c.sync_with_exist() self.assertEqual(load_mock.call_count, 0, "load should not have been called!") self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) def test_prepare_xml_skips_abnormal_chunks(self): """ ``prepare`` with the "xml" kind skips abnormal chunks. """ self.check_skip_abnormal_chunks("prepare", self.display_collection_path, "xml", True) def test_prepare_xml_syncs_normal_chunks(self): """ ``prepare`` with the "xml" kind syncs normal chunks. """ self.check_sync_normal_chunks("prepare", self.display_collection_path, "xml", True) def test_prepare_can_run_asynchronously(self): """ ``prepare`` can run asynchronously """ c = Chunk(data="<doc/>", is_normal=True) c.save() for kind in self.prepare_kinds: ret = c.prepare("xml") # When run asynchronously, we get an AsyncResult on which we # can call ``get``. ret.get() def check_remove_data_from_exist_and_cache(self, op): """ Check that invoking ``op`` will remove the data from the eXist database and the cache. """ db = ExistDB() c = Chunk(data="<div/>", is_normal=True) c.clean() method = op if isinstance(op, Callable) else getattr(c, op) cache.delete(c.c_hash) db.removeCollection(self.chunk_collection_path, True) db.removeCollection(self.display_collection_path, True) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) for kind in self.prepare_kinds: self.assertIsNone(cache.get(c.display_key(kind))) c.save() c._create_cached_data() keys = [c.display_key(kind) for kind in self.prepare_kinds] # Only the "xml" data is created on save. self.assertIsNotNone(cache.get(c.display_key("xml"))) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 1) self.assertEqual( len(list_collection(db, self.display_collection_path)), 1) method() self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) for key in keys: self.assertIsNone(cache.get(key)) def test_delete_removes_data_from_exist_and_cache(self): """ Deleting a chunk removes its associated data from eXist and from the cache. """ self.check_remove_data_from_exist_and_cache("delete") def test_delete_cached_data_removes_data_from_exist_and_cache(self): """ _delete_cached_data removes data from eXist and the cache. """ self.check_remove_data_from_exist_and_cache("_delete_cached_data") def check_abnormal_remove_data_from_exist_and_cache(self, op): db = ExistDB() c = Chunk(data="<div/>", is_normal=False) c.clean() cache.delete(c.c_hash) method = getattr(c, op) db.removeCollection(self.chunk_collection_path, True) db.removeCollection(self.display_collection_path, True) c.save() keys = [c.display_key(kind) for kind in self.prepare_kinds] for kind in self.prepare_kinds: self.assertIsNone(cache.get(c.display_key(kind))) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) with mock.patch('lexicography.models.ExistDB.removeDocument') as \ remove_mock: method() self.assertEqual(remove_mock.call_count, 0) self.assertEqual(len(list_collection(db, self.chunk_collection_path)), 0) self.assertEqual( len(list_collection(db, self.display_collection_path)), 0) for key in keys: self.assertIsNone(cache.get(key)) def test_delete_abnormal_does_not_touch_exist_or_cache(self): """ Deleting an abnormal chunk does not touch the cache or the database. """ self.check_abnormal_remove_data_from_exist_and_cache("delete") def test_delete_cached_data_abnormal_does_not_touch_exist_or_cache(self): """ ``_delete_cached_data`` on an abnormal chunk does not touch the cache or the database. """ self.check_abnormal_remove_data_from_exist_and_cache( "_delete_cached_data")
def filter_queryset(self, qs): # pylint: disable=too-many-branches search_value = self.request.GET.get('search[value]', None) if search_value is not None: search_value = search_value.encode("utf-8") lemmata_only = self.request.GET.get('lemmata_only', "false") == \ "true" if usermod.can_author(self.request.user): publication_status = self.request.GET.get('publication_status', "published") search_all = self.request.GET.get('search_all', "false") == "true" if not search_all: # Remove deleted entries from the set. active = qs.filter(entry__in=Entry.objects.active_entries()) if publication_status == "published": active = active.filter(entry__latest_published=F('pk')) elif publication_status == "unpublished": active = active.filter(entry__latest=F('pk')) \ .exclude(entry__latest_published=F('pk')) elif publication_status == "both": active = active.filter(entry__latest=F('pk')) else: raise ValueError("unknown value for publication_status: " + publication_status) else: if publication_status == "published": active = qs.filter(published=True) elif publication_status == "unpublished": active = qs.filter(published=False) elif publication_status == "both": active = qs else: raise ValueError("unknown value for publication_status: " + publication_status) else: # If the user cannot author, then our queryset is already # reduced to what the user can see: the latest version of # published articles. active = qs if search_value: db = ExistDB() chunks = [] if lemmata_only: scope = "//btw:lemma" # When we do a lemma search, hits are not useful. hit = "" else: scope = "//btw:entry" hit = "{kwic:summarize($m, <config width='80'/>)}" for query_chunk in query_iterator(db, xquery.format( """\ import module namespace kwic="http://exist-db.org/xquery/kwic"; for $m in collection({db}){scope}[ft:query(., {search_text})] order by ft:score($m) descending return <result><doc>{doc}</doc><hit>{hit}</hit></result>""", db=get_collection_path("display"), scope=xquery.Verbatim(scope), doc=xquery.Verbatim("{util:document-name($m)}"), hit=xquery.Verbatim(hit), search_text=search_value)): for result in query_chunk.results: chunk = result[0].text # Content of <doc>. self.chunk_to_hits[chunk] = result[1] chunks.append(chunk) # We need to get the changerecords that pertain to these chunks. qs = active.filter(c_hash__in=set(chunks)) else: qs = active return qs