Esempio n. 1
0
    def __call__(self, command, _options):
        assert_running()

        db = get_admin_db()
        for collection in [settings.EXISTDB_ROOT_COLLECTION,
                           get_collection_path("chunks"),
                           get_collection_path("display")]:
            db.createCollection(collection)
            db.server.setPermissions(collection, command.server_user,
                                     command.btw_group, 0770)
Esempio n. 2
0
 def test_exist_path(self):
     """
     ``exist_path`` returns good values.
     """
     c = Chunk(data="<div/>", is_normal=True)
     c.save()
     self.assertEqual(c.exist_path("chunks"),
                      "/".join([get_collection_path("chunks"), c.c_hash]))
     self.assertEqual(c.exist_path("display"),
                      "/".join([get_collection_path("display"), c.c_hash]))
Esempio n. 3
0
 def test_exist_path(self):
     """
     ``exist_path`` returns good values.
     """
     c = Chunk(data="<div/>", is_normal=True)
     c.save()
     self.assertEqual(c.exist_path("chunks"),
                      "/".join([get_collection_path("chunks"),
                                c.c_hash]))
     self.assertEqual(c.exist_path("display"),
                      "/".join([get_collection_path("display"),
                                c.c_hash]))
Esempio n. 4
0
    def sync_with_exist(self):
        self.collect()
        db = ExistDB()
        present = set()
        for chunk in self.filter(is_normal=True):
            chunk.sync_with_exist(db)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("chunks"))
Esempio n. 5
0
    def sync_with_exist(self):
        self.collect()
        db = ExistDB()
        present = set()
        for chunk in self.all_syncable_chunks():
            chunk.sync_with_exist(db)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("chunks"))
Esempio n. 6
0
 def __call__(self, command, _options):
     """
     Load the utilities into the database. This is necessary for BTW to run.
     """
     util_path = get_collection_path("util")
     db = get_admin_db()
     if not db.hasCollection(util_path):
         command.create_collections([util_path])
     db.query(xquery.format(
         "xmldb:store({db}, 'empty.xml', <doc/>)", db=util_path))
Esempio n. 7
0
    def __call__(self, command, _options):
        """
        Load initial data into a new database. This is necessary for BTW
        to run.
        """
        assert_running()

        from django.utils import translation
        translation.activate('en-us')

        db = ExistDB()
        chunk_collection_path = get_collection_path("chunks")

        if db.hasCollection(chunk_collection_path):
            db.removeCollection(chunk_collection_path)

        Chunk.objects.sync_with_exist()

        display_path = get_collection_path("display")
        if db.hasCollection(display_path):
            db.removeCollection(display_path)
        Chunk.objects.prepare("xml", True)
Esempio n. 8
0
    def prepare(self, kind, synchronous):
        if kind != "xml":
            raise ValueError("the manager only supports preparing XML data; "
                             "future versions may support other kinds")

        self.collect()
        db = ExistDB()
        present = set()
        for chunk in self.filter(is_normal=True):
            chunk.prepare("xml", synchronous)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("display"))
Esempio n. 9
0
    def __call__(self, command, _options):
        """
        Load initial data into a new database. This is necessary for BTW
        to run.
        """
        assert_running()

        from django.utils import translation
        translation.activate('en-us')

        db = ExistDB()
        chunk_collection_path = get_collection_path("chunks")

        if db.hasCollection(chunk_collection_path):
            db.removeCollection(chunk_collection_path)

        Chunk.objects.sync_with_exist()

        display_path = get_collection_path("display")
        if db.hasCollection(display_path):
            db.removeCollection(display_path)
        Chunk.objects.prepare("xml", include_unpublished=False)
Esempio n. 10
0
    def prepare(self, kind, include_unpublished):
        if kind != "xml":
            raise ValueError("the manager only supports preparing XML data; "
                             "future versions may support other kinds")

        self.collect()
        db = ExistDB()
        present = set()
        chunks = self.all_syncable_chunks()
        if not include_unpublished:
            chunks = chunks.filter(changerecord__published=True)
        for chunk in chunks:
            chunk.prepare("xml", True)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("display"))
Esempio n. 11
0
    def test_no_exist_document(self):
        """
        When the exist document is missing, raise an error. We want an
        error because it indicates something really broken about our
        internal state. We should never have metadata without a
        corresponding XML file.
        """
        cr = ChangeRecord.objects.get(pk=1)
        chunk = cr.c_hash
        self.assertIsNotNone(cache.get(chunk.display_key("xml")))

        cache.clear()
        db = ExistDB()
        db.removeCollection(get_collection_path("display"), True)

        with self.assertRaises(ExistDBException):
            tasks.fetch_xml(chunk.c_hash)
Esempio n. 12
0
    def hashes_with_semantic_field(self, sf):
        """
        Returns a set of chunk *hashes* that contain the semantic field
        requested.
        """
        db = ExistDB()
        chunks = set()

        for query_chunk in query_iterator(db, xquery.format(
                """\
for $m in collection({db})//btw:sf[@ref = {path}]
return util:document-name($m)""",
                db=get_collection_path("display"),
                path=sf)):

            for result in query_chunk.values:
                chunks.add(result)

        return chunks
Esempio n. 13
0
    def hashes_with_semantic_field(self, sf):
        """
        Returns a set of chunk *hashes* that contain the semantic field
        requested.
        """
        db = ExistDB()
        chunks = set()

        for query_chunk in query_iterator(
                db,
                xquery.format("""\
for $m in collection({db})//btw:sf[@ref = {path}]
return util:document-name($m)""",
                              db=get_collection_path("display"),
                              path=sf)):

            for result in query_chunk.values:
                chunks.add(result)

        return chunks
Esempio n. 14
0
 def __call__(self, command, _options):
     assert_running()
     db = get_admin_db()
     collection = get_collection_path(None)
     db.removeCollectionIndex(collection)
Esempio n. 15
0
 def __call__(self, command, _options):
     assert_running()
     db = get_admin_db()
     collection = get_collection_path(None)
     db.loadCollectionIndex(collection, open(command.chunk_index, 'r'))
     db.reindexCollection(collection)
Esempio n. 16
0
 def __call__(self, command, _options):
     command.create_collections([settings.EXISTDB_ROOT_COLLECTION,
                                 get_collection_path("chunks"),
                                 get_collection_path("display")])
     Loadutil()(command, _options)
Esempio n. 17
0
    def filter_queryset(self, qs):  # pylint: disable=too-many-branches
        search_value = self.request.GET.get('search[value]', None)

        lemmata_only = self.request.GET.get('lemmata_only', "false") == \
            "true"

        if usermod.can_author(self.request.user):
            publication_status = self.request.GET.get('publication_status',
                                                      "published")
            search_all = self.request.GET.get('search_all', "false") == "true"
            if not search_all:
                # Remove deleted entries from the set.
                active = qs.filter(entry__in=Entry.objects.active_entries())
                if publication_status == "published":
                    active = active.filter(entry__latest_published=F('pk'))
                elif publication_status == "unpublished":
                    active = active.filter(entry__latest=F('pk')) \
                                   .exclude(entry__latest_published=F('pk'))
                elif publication_status == "both":
                    active = active.filter(entry__latest=F('pk'))
                else:
                    raise ValueError("unknown value for publication_status: " +
                                     publication_status)
            else:
                if publication_status == "published":
                    active = qs.filter(published=True)
                elif publication_status == "unpublished":
                    active = qs.filter(published=False)
                elif publication_status == "both":
                    active = qs
                else:
                    raise ValueError("unknown value for publication_status: " +
                                     publication_status)
        else:
            # If the user cannot author, then our queryset is already
            # reduced to what the user can see: the latest version of
            # published articles.
            active = qs

        if search_value:
            db = ExistDB()
            chunks = []
            if lemmata_only:
                scope = "//btw:lemma"
                # When we do a lemma search, hits are not useful.
                hit = ""
            else:
                scope = "//btw:entry"
                hit = "{kwic:summarize($m, <config width='80'/>)}"

            for query_chunk in query_iterator(
                    db,
                    xquery.format(
                        """\
import module namespace kwic="http://exist-db.org/xquery/kwic";
for $m in collection({db}){scope}[ft:query(., {search_text})]
order by ft:score($m) descending
return <result><doc>{doc}</doc><hit>{hit}</hit></result>""",
                        db=get_collection_path("display"),
                        scope=xquery.Verbatim(scope),
                        doc=xquery.Verbatim("{util:document-name($m)}"),
                        hit=xquery.Verbatim(hit),
                        search_text=search_value)):

                for result in query_chunk.results:
                    chunk = result[0].text  # Content of <doc>.
                    self.chunk_to_hits[chunk] = result[1]
                    chunks.append(chunk)

            # We need to get the changerecords that pertain to these chunks.
            qs = active.filter(c_hash__in=set(chunks))
        else:
            qs = active

        return qs
Esempio n. 18
0
class ChunkTransactionTestCase(util.DisableMigrationsTransactionMixin,
                               TransactionTestCase):
    chunk_collection_path = get_collection_path("chunks")
    display_collection_path = get_collection_path("display")

    prepare_kinds = Chunk.key_kinds

    def setUp(self):
        self.foo = foo = user_model.objects.create(username="******",
                                                   password="******")
        scribe = Group.objects.get(name='scribe')
        foo.groups.add(scribe)
        cache.clear()
        db = ExistDB()
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)
        return super(ChunkTransactionTestCase, self).setUp()

    def test_when_chunk_becomes_hidden_cached_data_is_cleared(self):
        """
        When a ``Chunk`` becomes hidden, then its cached data is deleted.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)

        db = ExistDB()
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)

        e.latest.hidden = True
        e.latest.save()

        for key in keys:
            self.assertIsNone(cache.get(key))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

    def test_when_chunk_becomes_shown_cached_data_is_created(self):
        """
        When a ``Chunk`` becomes shown, then its cached data is created.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)

        db = ExistDB()
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)

        e.latest.hidden = True
        e.latest.save()

        for key in keys:
            self.assertIsNone(cache.get(key))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)

        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)
Esempio n. 19
0
 def __call__(self, command, _options):
     assert_running()
     db = get_admin_db()
     collection = get_collection_path(None)
     db.loadCollectionIndex(collection, open(command.chunk_index, 'r'))
     db.reindexCollection(collection)
Esempio n. 20
0
class ChunkManagerSimpleTestCase(util.DisableMigrationsMixin, TestCase):
    chunk_collection_path = get_collection_path("chunks")
    display_collection_path = get_collection_path("display")

    @classmethod
    def setUpTestData(cls):
        super(ChunkManagerSimpleTestCase, cls).setUpTestData()
        cls.foo = user_model.objects.create(username="******", password="******")

    def setUp(self):
        self.manager = Chunk.objects
        return super(ChunkManagerSimpleTestCase, self).setUp()

    def make_reachable(self, chunk):
        # Make the chunk reachable
        e = Entry()
        e.update(self.foo, "q", chunk, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        return e

    def list_chunk_collection(self):
        db = ExistDB()
        return list_collection(db, self.chunk_collection_path)

    def list_display_collection(self):
        db = ExistDB()
        return list_collection(db, self.chunk_collection_path)

    def check_collects(self, op, *args):
        self.assertEqual(self.manager.count(), 0)
        c = Chunk(data="", is_normal=False)
        c.save()
        self.assertEqual(self.manager.count(), 1)
        getattr(self.manager, op)(*args)
        self.assertEqual(self.manager.count(), 0)

    def test_collect_collects_unreachable(self):
        """
        ``collect`` collects unreachable chunks.
        """
        self.check_collects("collect")

    def test_collect_does_not_collect_reachable(self):
        """
        Does not collect reachable chunks.
        """
        self.assertEqual(self.manager.count(), 0)
        c = Chunk(data="", is_normal=False)
        c.save()

        self.make_reachable(c)
        self.assertEqual(self.manager.count(), 1)

        self.manager.collect()
        # Not collected!
        self.assertEqual(self.manager.count(), 1)

    def test_sync_collects(self):
        """
        ``sync_with_exist`` causes a collection of unreachable chunks.
        """
        self.check_collects("sync_with_exist")

    def check_skip_abnormal_chunks(self, op, collection, *args):
        c = Chunk(data="", is_normal=False)
        c.save()
        self.make_reachable(c)
        db = ExistDB()
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(self.manager, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 0)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)

    def check_syncs_normal_chunks(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.make_reachable(c)
        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(self.manager, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)

    def test_sync_skips_abnormal_chunks(self):
        """
        ``sync_with_exist`` does not sync abnormal chunks.
        """

        self.check_skip_abnormal_chunks("sync_with_exist",
                                        self.chunk_collection_path)

    def test_sync_syncs_normal_chunks(self):
        """
        ``sync_with_exist`` syncs normal chunks.
        """

        self.check_syncs_normal_chunks("sync_with_exist",
                                       self.chunk_collection_path)

    def check_deletes_documents(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        entry = self.make_reachable(c)

        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # op will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        op = getattr(self.manager, op)
        op(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)

        # Now we delete the chunk in SQL because we do not want the
        # ``delete`` method to be called, as it would take care of
        # removing the document itself. (And yes, we do interpolate
        # the table name. This is safe as ``Entry._meta.db_table`` is
        # a value under our control.)
        with connection.cursor() as cursor:
            cr = entry.latest
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(entry._meta.db_table),
                [entry.pk])
            # We have to do this ourselves because Django's cascading
            # delete is implemented at the ORM level, not the database
            # level.
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(cr._meta.db_table),
                [cr.pk])

        # Check that no collection or syncing has occurred.
        self.assertEqual(self.manager.count(), 1)
        self.assertEqual(len(list_collection(db, collection)), 1)

        op(*args)

        # Make sure our chunk was collected.
        self.assertEqual(self.manager.count(), 0)
        self.assertEqual(len(list_collection(db, collection)), 0)

    def test_sync_deletes_exist_documents(self):
        """
        ``sync_with_exist`` deletes those eXist documents that belong to
        chunks that no longer exist.
        """

        self.check_deletes_documents("sync_with_exist",
                                     self.chunk_collection_path)

    def test_prepare_collects(self):
        """
        ``prepare`` causes a collection of unreachable chunks.
        """
        self.check_collects("prepare", "xml", True)

    def test_prepare_skips_abnormal_chunks(self):
        """
        ``prepare`` does not sync abnormal chunks.
        """
        self.check_skip_abnormal_chunks("prepare",
                                        self.display_collection_path, "xml",
                                        True)

    def test_prepare_syncs_normal_chunks(self):
        """
        ``prepare`` syncs normal chunks.
        """
        self.check_syncs_normal_chunks("prepare", self.display_collection_path,
                                       "xml", True)

    def test_prepare_deletes_exist_documents(self):
        """
        ``prepare`` deletes those eXist documents that belong to
        chunks that no longer exist.
        """

        self.check_deletes_documents("prepare", self.display_collection_path,
                                     "xml", True)
Esempio n. 21
0
class ChunkTestCase(util.DisableMigrationsMixin, TestCase):
    chunk_collection_path = get_collection_path("chunks")
    display_collection_path = get_collection_path("display")

    prepare_kinds = Chunk.key_kinds

    @classmethod
    def setUpTestData(cls):
        super(ChunkTestCase, cls).setUpTestData()
        cls.foo = foo = user_model.objects.create(username="******",
                                                  password="******")
        scribe = Group.objects.get(name='scribe')
        cls.foo.groups.add(scribe)

    def setUp(self):
        cache.clear()
        return super(ChunkTestCase, self).setUp()

    def assertLogRegexp(self, handler, stream, regexp):
        handler.flush()
        self.assertRegex(stream.getvalue(), regexp)

    def test_abnormal_is_invalid(self):
        """
        Checks that an abnormal chunk is invalid, and that its validity is
        saved after being computed.
        """
        c = Chunk(data="", is_normal=False)
        c.save()
        self.assertIsNone(c._valid)
        self.assertFalse(c.valid)
        self.assertFalse(
            Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.")

    def test_valid(self):
        """
        Checks that an normal chunk can be valid, and that its validity is
        saved after being computed.
        """
        c = Chunk(data=valid_editable.decode('utf-8'),
                  schema_version=schema_version)
        c.save()
        self.assertIsNone(c._valid)
        self.assertTrue(c.valid)
        self.assertTrue(Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.")

    def test_valid(self):
        """
        Checks that an normal chunk can be valid, and that its validity is
        saved after being computed.
        """
        c = Chunk(data=valid_editable.decode('utf-8'),
                  schema_version=schema_version)
        c.save()
        self.assertIsNone(c._valid)
        self.assertTrue(c.valid)
        self.assertTrue(Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.")

    def test_invalid(self):
        """
        Checks that data that is invalid is recognized as invalid, and the
        the validity is saved after being computed.
        """

        # This data is just flat out invalid...
        data = """
<btw:entry xmlns="http://www.tei-c.org/ns/1.0" version="0.10"\
  xmlns:btw="http://mangalamresearch.org/ns/btw-storage">
  <btw:lemma></btw:lemma>
</btw:entry>
        """

        c = Chunk(data=data, schema_version=schema_version)
        c.save()
        self.assertIsNone(c._valid)
        self.assertFalse(c.valid)
        self.assertFalse(
            Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.")

    def test_invalid_schematron(self):
        """
        Checks that data that is invalid only due to the schematron check
        is recognized as invalid, and the the validity is saved after
        being computed.
        """
        tree = lxml.etree.fromstring(valid_editable)
        sfs = tree.xpath(
            "//btw:example/btw:semantic-fields | "
            "//btw:example-explained/btw:semantic-fields",
            namespaces=xml.default_namespace_mapping)

        for el in sfs:
            el.getparent().remove(el)
        data = lxml.etree.tostring(tree,
                                   xml_declaration=True,
                                   encoding='utf-8').decode('utf-8')
        self.assertTrue(
            util.validate_with_rng(xml.schema_for_version(schema_version),
                                   data), "the data should validate")
        self.assertFalse(
            util.schematron(xml.schematron_for_version(schema_version), data),
            "the data should not pass the schematron check")
        c = Chunk(data=data, schema_version=schema_version)
        c.save()
        self.assertIsNone(c._valid)
        self.assertFalse(c.valid)
        self.assertFalse(
            Chunk.objects.get(pk=c.pk)._valid, "_valid was saved.")

    def test_published_false(self):
        """
        ``published`` is false for chunks that have not been published.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.assertFalse(c.published)

    def test_published_true(self):
        """
        ``published`` is true for chunks that have been published.
        """
        c = Chunk(data=valid_editable.decode('utf-8'),
                  schema_version=schema_version)
        c.save()
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        e.latest.publish(self.foo)
        self.assertTrue(c.published)

    def test_chunks_start_hidden(self):
        """
        ``hidden`` is ``True`` for new chunks.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.assertTrue(c.hidden)

    def test_chunks_with_all_hidden_records_are_hidden(self):
        """
        ``hidden`` is ``True`` if all ``ChangeRecord``s are hidden.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        e.latest.hidden = True
        e.latest.save()

        e = Entry()
        e.update(self.foo, "q", c, "foo2", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        e.latest.hidden = True
        e.latest.save()
        self.assertTrue(c.hidden)

    def test_chunks_with_one_shown_record_are_shown(self):
        """
        ``hidden`` is ``False`` if one ``ChangeRecord`` is shown.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        e.latest.hidden = True
        e.latest.save()

        e = Entry()
        e.update(self.foo, "q", c, "foo2", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)
        self.assertFalse(c.hidden)

    def test_exist_path(self):
        """
        ``exist_path`` returns good values.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.assertEqual(c.exist_path("chunks"),
                         "/".join([get_collection_path("chunks"), c.c_hash]))
        self.assertEqual(c.exist_path("display"),
                         "/".join([get_collection_path("display"), c.c_hash]))

    def test_exist_path_raises(self):
        """
        ``exist_path`` raises an error if the kind is wrong.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        with self.assertRaisesRegex(ValueError, "unknown value"):
            c.exist_path("invalid")

    def test_display_key(self):
        """
        ``display_key`` returns good values.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        for kind in self.prepare_kinds:
            self.assertEqual(c.display_key(kind),
                             "{}_{}".format(c.c_hash, kind).encode("utf8"))

    def test_display_key_raises(self):
        """
        ``display_key`` raises an error if the kind is wrong.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        with self.assertRaisesRegex(ValueError, "unknown display key kind"):
            c.display_key("invalid")

    def test_get_cached_value_starts_task(self):
        """
        Check that ``get_cached_value`` starts an actual task if the value
        is missing, and returns ``None``.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()

        for kind in self.prepare_kinds:
            cache.clear()
            with util.WithStringIO(models.logger) as (stream, handler):
                self.assertIsNone(c.get_cached_value(kind))
                self.assertLogRegexp(
                    handler, stream,
                    "^{0} is missing from article_display, launching task$".
                    format(c.display_key(kind)))

    def test_get_cached_value_knows_about_tasks(self):
        """
        Check that ``get_cached_value`` will log if a task is already
        computing the value and will return ``None``.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()

        for kind in self.prepare_kinds:
            cache.clear()
            cache.set(c.display_key(kind), {"task": "foo"})
            with util.WithStringIO(models.logger) as (stream, handler):
                self.assertIsNone(c.get_cached_value(kind))
                self.assertLogRegexp(
                    handler, stream,
                    "^{0} is being computed by task foo$".format(
                        c.display_key(kind)))

    def test_get_cached_value_returns_available_data(self):
        """
        Check that ``get_cached_value`` will log if a task is already
        computing the value and will return ``None``.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        c._create_cached_data()
        # We have to launch the bibl data preparation ourselves.
        c.prepare("bibl", True)

        for kind in self.prepare_kinds:
            expected = cache.get(c.display_key(kind))
            self.assertIsNotNone(expected)
            self.assertEqual(c.get_cached_value(kind), expected)

    def check_skip_abnormal_chunks(self, op, collection, *args):
        c = Chunk(data="", is_normal=False)
        c.save()

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(c, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 0)

    def test_sync_skips_abnormal_chunks(self):
        """
        ``sync_with_exist`` skips abnormal chunks.
        """
        self.check_skip_abnormal_chunks("sync_with_exist",
                                        self.chunk_collection_path)

    def check_sync_normal_chunks(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        ret = getattr(c, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)
        return ret

    def test_sync_syncs_normal_chunks(self):
        """
        ``sync_with_exist`` syncs normal chunks.
        """
        self.check_sync_normal_chunks("sync_with_exist",
                                      self.chunk_collection_path)

    def test_sync_handles_overwrites(self):
        """
        ``sync_with_exist`` will not overwrite documents already in eXist.
        """
        db = ExistDB()
        db.removeCollection(self.chunk_collection_path, True)
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        c.sync_with_exist()

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)

        with mock.patch('lexicography.models.ExistDB.load') as load_mock:
            c.sync_with_exist()
            self.assertEqual(load_mock.call_count, 0,
                             "load should not have been called!")

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)

    def test_prepare_xml_skips_abnormal_chunks(self):
        """
        ``prepare`` with the "xml" kind skips abnormal chunks.
        """
        self.check_skip_abnormal_chunks("prepare",
                                        self.display_collection_path, "xml",
                                        True)

    def test_prepare_xml_syncs_normal_chunks(self):
        """
        ``prepare`` with the "xml" kind syncs normal chunks.
        """
        self.check_sync_normal_chunks("prepare", self.display_collection_path,
                                      "xml", True)

    def test_prepare_can_run_asynchronously(self):
        """
        ``prepare`` can run asynchronously
        """
        c = Chunk(data="<doc/>", is_normal=True)
        c.save()

        for kind in self.prepare_kinds:
            ret = c.prepare("xml")
            # When run asynchronously, we get an AsyncResult on which we
            # can call ``get``.
            ret.get()

    def check_remove_data_from_exist_and_cache(self, op):
        """
        Check that invoking ``op`` will remove the data from the eXist
        database and the cache.
        """
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=True)
        c.clean()
        method = op if isinstance(op, Callable) else getattr(c, op)
        cache.delete(c.c_hash)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))

        c.save()
        c._create_cached_data()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        # Only the "xml" data is created on save.
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)

        method()
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)
        for key in keys:
            self.assertIsNone(cache.get(key))

    def test_delete_removes_data_from_exist_and_cache(self):
        """
        Deleting a chunk removes its associated data from eXist and from
        the cache.
        """
        self.check_remove_data_from_exist_and_cache("delete")

    def test_delete_cached_data_removes_data_from_exist_and_cache(self):
        """
        _delete_cached_data removes data from eXist and the cache.
        """
        self.check_remove_data_from_exist_and_cache("_delete_cached_data")

    def check_abnormal_remove_data_from_exist_and_cache(self, op):
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=False)
        c.clean()
        cache.delete(c.c_hash)
        method = getattr(c, op)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)

        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

        with mock.patch('lexicography.models.ExistDB.removeDocument') as \
                remove_mock:
            method()
            self.assertEqual(remove_mock.call_count, 0)

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)
        for key in keys:
            self.assertIsNone(cache.get(key))

    def test_delete_abnormal_does_not_touch_exist_or_cache(self):
        """
        Deleting an abnormal chunk does not touch the cache or the database.
        """
        self.check_abnormal_remove_data_from_exist_and_cache("delete")

    def test_delete_cached_data_abnormal_does_not_touch_exist_or_cache(self):
        """
        ``_delete_cached_data`` on an abnormal chunk does not touch the
        cache or the database.
        """
        self.check_abnormal_remove_data_from_exist_and_cache(
            "_delete_cached_data")
Esempio n. 22
0
 def __call__(self, command, _options):
     assert_running()
     db = get_admin_db()
     collection = get_collection_path(None)
     db.removeCollectionIndex(collection)
Esempio n. 23
0
    def filter_queryset(self, qs):  # pylint: disable=too-many-branches
        search_value = self.request.GET.get('search[value]', None)

        if search_value is not None:
            search_value = search_value.encode("utf-8")

        lemmata_only = self.request.GET.get('lemmata_only', "false") == \
            "true"

        if usermod.can_author(self.request.user):
            publication_status = self.request.GET.get('publication_status',
                                                      "published")
            search_all = self.request.GET.get('search_all', "false") == "true"
            if not search_all:
                # Remove deleted entries from the set.
                active = qs.filter(entry__in=Entry.objects.active_entries())
                if publication_status == "published":
                    active = active.filter(entry__latest_published=F('pk'))
                elif publication_status == "unpublished":
                    active = active.filter(entry__latest=F('pk')) \
                                   .exclude(entry__latest_published=F('pk'))
                elif publication_status == "both":
                    active = active.filter(entry__latest=F('pk'))
                else:
                    raise ValueError("unknown value for publication_status: " +
                                     publication_status)
            else:
                if publication_status == "published":
                    active = qs.filter(published=True)
                elif publication_status == "unpublished":
                    active = qs.filter(published=False)
                elif publication_status == "both":
                    active = qs
                else:
                    raise ValueError("unknown value for publication_status: " +
                                     publication_status)
        else:
            # If the user cannot author, then our queryset is already
            # reduced to what the user can see: the latest version of
            # published articles.
            active = qs

        if search_value:
            db = ExistDB()
            chunks = []
            if lemmata_only:
                scope = "//btw:lemma"
                # When we do a lemma search, hits are not useful.
                hit = ""
            else:
                scope = "//btw:entry"
                hit = "{kwic:summarize($m, <config width='80'/>)}"

            for query_chunk in query_iterator(db, xquery.format(
                    """\
import module namespace kwic="http://exist-db.org/xquery/kwic";
for $m in collection({db}){scope}[ft:query(., {search_text})]
order by ft:score($m) descending
return <result><doc>{doc}</doc><hit>{hit}</hit></result>""",
                    db=get_collection_path("display"),
                    scope=xquery.Verbatim(scope),
                    doc=xquery.Verbatim("{util:document-name($m)}"),
                    hit=xquery.Verbatim(hit),
                    search_text=search_value)):

                for result in query_chunk.results:
                    chunk = result[0].text  # Content of <doc>.
                    self.chunk_to_hits[chunk] = result[1]
                    chunks.append(chunk)

            # We need to get the changerecords that pertain to these chunks.
            qs = active.filter(c_hash__in=set(chunks))
        else:
            qs = active

        return qs