Exemple #1
0
    def check_abnormal_remove_data_from_exist_and_cache(self, op):
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=False)
        c.clean()
        cache.delete(c.c_hash)
        method = getattr(c, op)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)

        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(len(list_collection(db,
                                             self.display_collection_path)),
                         0)

        with mock.patch('lexicography.models.ExistDB.removeDocument') as \
                remove_mock:
            method()
            self.assertEqual(remove_mock.call_count, 0)

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(len(list_collection(db,
                                             self.display_collection_path)),
                         0)
        for key in keys:
            self.assertIsNone(cache.get(key))
Exemple #2
0
def fetch_xml(pk):
    """
    This function will check in the cache first and if the xml is not
    present there it will load it from eXist and put it back in the
    cache. This is not actually a task but it is so tightly related to
    the ``process_xml`` task that it is included among the other
    tasks.
    """
    key = make_display_key("xml", pk)
    xml = cache.get(key)
    if xml:
        return xml

    # We make this atomic and use select_for_update so that anything
    # else that might want to mess with our chunks is blocked from
    # doing so until we are done.
    with transaction.atomic():
        try:
            meta = ChunkMetadata.objects \
                                .select_for_update().get(chunk_id=pk)
        except ChunkMetadata.DoesNotExist:
            meta = None

        xml = None
        if meta:
            path = get_path_for_chunk_hash("display", pk)
            db = ExistDB()
            xml = db.getDocument(path).decode("utf-8")

            if xml:
                cache.set(key, xml,
                          timeout=settings.LEXICOGRAPHY_XML_TIMEOUT)

        return xml
Exemple #3
0
def fetch_xml(pk):
    """
    This function will check in the cache first and if the xml is not
    present there it will load it from eXist and put it back in the
    cache. This is not actually a task but it is so tightly related to
    the ``process_xml`` task that it is included among the other
    tasks.
    """
    key = make_display_key("xml", pk)
    xml = cache.get(key)
    if xml:
        return xml

    # We make this atomic and use select_for_update so that anything
    # else that might want to mess with our chunks is blocked from
    # doing so until we are done.
    with transaction.atomic():
        try:
            meta = ChunkMetadata.objects \
                                .select_for_update().get(chunk_id=pk)
        except ChunkMetadata.DoesNotExist:
            meta = None

        xml = None
        if meta:
            path = get_path_for_chunk_hash("display", pk)
            db = ExistDB()
            xml = db.getDocument(path).decode("utf-8")

            if xml:
                cache.set(key, xml, timeout=settings.LEXICOGRAPHY_XML_TIMEOUT)

        return xml
Exemple #4
0
    def _delete_cached_data(self):
        if self.is_normal:
            db = ExistDB()
            db.removeDocument(self.exist_path("chunks"), True)
            db.removeDocument(self.exist_path("display"), True)

            cache.delete_many(self.display_key(kind)
                              for kind in self.key_kinds)
Exemple #5
0
 def setUp(self):
     self.foo = foo = user_model.objects.create(
         username="******", password="******")
     scribe = Group.objects.get(name='scribe')
     foo.groups.add(scribe)
     cache.clear()
     db = ExistDB()
     db.removeCollection(self.chunk_collection_path, True)
     db.removeCollection(self.display_collection_path, True)
     return super(ChunkTransactionTestCase, self).setUp()
Exemple #6
0
    def check_deletes_documents(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        entry = self.make_reachable(c)

        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # op will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        op = getattr(self.manager, op)
        op(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)

        # Now we delete the chunk in SQL because we do not want the
        # ``delete`` method to be called, as it would take care of
        # removing the document itself. (And yes, we do interpolate
        # the table name. This is safe as ``Entry._meta.db_table`` is
        # a value under our control.)
        with connection.cursor() as cursor:
            cr = entry.latest
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(entry._meta.db_table),
                [entry.pk])
            # We have to do this ourselves because Django's cascading
            # delete is implemented at the ORM level, not the database
            # level.
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(cr._meta.db_table),
                [cr.pk])

        # Check that no collection or syncing has occurred.
        self.assertEqual(self.manager.count(), 1)
        self.assertEqual(len(list_collection(db, collection)), 1)

        op(*args)

        # Make sure our chunk was collected.
        self.assertEqual(self.manager.count(), 0)
        self.assertEqual(len(list_collection(db, collection)), 0)
Exemple #7
0
    def check_deletes_documents(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        entry = self.make_reachable(c)

        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # op will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        op = getattr(self.manager, op)
        op(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)

        # Now we delete the chunk in SQL because we do not want the
        # ``delete`` method to be called, as it would take care of
        # removing the document itself. (And yes, we do interpolate
        # the table name. This is safe as ``Entry._meta.db_table`` is
        # a value under our control.)
        with connection.cursor() as cursor:
            cr = entry.latest
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(entry._meta.db_table),
                [entry.pk])
            # We have to do this ourselves because Django's cascading
            # delete is implemented at the ORM level, not the database
            # level.
            cursor.execute(
                "DELETE FROM {} WHERE id = %s".format(cr._meta.db_table),
                [cr.pk])

        # Check that no collection or syncing has occurred.
        self.assertEqual(self.manager.count(), 1)
        self.assertEqual(len(list_collection(db, collection)), 1)

        op(*args)

        # Make sure our chunk was collected.
        self.assertEqual(self.manager.count(), 0)
        self.assertEqual(len(list_collection(db, collection)), 0)
Exemple #8
0
def prepare_xml(pk):
    """
    This function prepares a chunk for display and caches the
    result of the prepared XML.

    :param pk: The primary key of the chunk to prepare.
    :type pk: :class:`int`
    """

    # By using atomicity and using select_for_update we are
    # effectively preventing other prepare_xml tasks from working on
    # the same chunk at the same time.
    with transaction.atomic():
        chunk = Chunk.objects.get(pk=pk)
        key = chunk.display_key("xml")
        logger.debug("%s processing...", key)
        meta, _ = ChunkMetadata.objects \
            .select_for_update() \
            .get_or_create(chunk=chunk)

        data = chunk.data
        xml, sf_records = prepare_article_data(data)

        cache.set(key, xml, timeout=settings.LEXICOGRAPHY_XML_TIMEOUT)

        logger.debug("%s is set", key)

        sha1 = hashlib.sha1()
        sha1.update(xml.encode('utf-8'))
        xml_hash = sha1.hexdigest()
        db = ExistDB()
        path = get_path_for_chunk_hash("display", pk)
        absent = not db.hasDocument(path)
        if meta.xml_hash != xml_hash or absent:
            # This is something that should not happen ever. It has
            # happened once in development but it is unclear what could
            # have been the cause.
            if meta.xml_hash == xml_hash and absent:
                logger.error(
                    "%s was missing from eXist but had a value "
                    "already set and equal to the new hash; this "
                    "should not happen!", path)

            meta.semantic_fields.set(sf_records)
            # Technically, if it was created then xml_hash is already
            # set, but putting this in an conditional block does not
            # provide for better performance.
            meta.xml_hash = xml_hash
            meta.save()
            if not db.load(xml.encode("utf-8"), path):
                raise Exception("could not sync with eXist database")
Exemple #9
0
def prepare_xml(pk):
    """
    This function prepares a chunk for display and caches the
    result of the prepared XML.

    :param pk: The primary key of the chunk to prepare.
    :type pk: :class:`int`
    """

    # By using atomicity and using select_for_update we are
    # effectively preventing other prepare_xml tasks from working on
    # the same chunk at the same time.
    with transaction.atomic():
        chunk = Chunk.objects.get(pk=pk)
        key = chunk.display_key("xml")
        logger.debug("%s processing...", key)
        meta, _ = ChunkMetadata.objects \
            .select_for_update() \
            .get_or_create(chunk=chunk)

        data = chunk.data
        xml, sf_records = prepare_article_data(data)

        cache.set(key, xml, timeout=settings.LEXICOGRAPHY_XML_TIMEOUT)

        logger.debug("%s is set", key)

        sha1 = hashlib.sha1()
        sha1.update(xml.encode('utf-8'))
        xml_hash = sha1.hexdigest()
        db = ExistDB()
        path = get_path_for_chunk_hash("display", pk)
        absent = not db.hasDocument(path)
        if meta.xml_hash != xml_hash or absent:
            # This is something that should not happen ever. It has
            # happened once in development but it is unclear what could
            # have been the cause.
            if meta.xml_hash == xml_hash and absent:
                logger.error("%s was missing from eXist but had a value "
                             "already set and equal to the new hash; this "
                             "should not happen!", path)

            meta.semantic_fields.set(sf_records)
            # Technically, if it was created then xml_hash is already
            # set, but putting this in an conditional block does not
            # provide for better performance.
            meta.xml_hash = xml_hash
            meta.save()
            if not db.load(xml.encode("utf-8"), path):
                raise Exception("could not sync with eXist database")
Exemple #10
0
    def check_skip_abnormal_chunks(self, op, collection, *args):
        c = Chunk(data="", is_normal=False)
        c.save()

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(c, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 0)
Exemple #11
0
    def check_skip_abnormal_chunks(self, op, collection, *args):
        c = Chunk(data="", is_normal=False)
        c.save()

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(c, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 0)
Exemple #12
0
    def test_when_chunk_becomes_hidden_cached_data_is_cleared(self):
        """
        When a ``Chunk`` becomes hidden, then its cached data is deleted.
        """
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]
        e = Entry()
        e.update(self.foo, "q", c, "foo", ChangeRecord.CREATE,
                 ChangeRecord.MANUAL)

        db = ExistDB()
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)

        e.latest.hidden = True
        e.latest.save()

        for key in keys:
            self.assertIsNone(cache.get(key))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)
Exemple #13
0
    def test_no_exist_document(self):
        """
        When the exist document is missing, raise an error. We want an
        error because it indicates something really broken about our
        internal state. We should never have metadata without a
        corresponding XML file.
        """
        cr = ChangeRecord.objects.get(pk=1)
        chunk = cr.c_hash
        self.assertIsNotNone(cache.get(chunk.display_key("xml")))

        cache.clear()
        db = ExistDB()
        db.removeCollection(get_collection_path("display"), True)

        with self.assertRaises(ExistDBException):
            tasks.fetch_xml(chunk.c_hash)
Exemple #14
0
    def sync_with_exist(self):
        self.collect()
        db = ExistDB()
        present = set()
        for chunk in self.all_syncable_chunks():
            chunk.sync_with_exist(db)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("chunks"))
Exemple #15
0
    def test_sync_handles_overwrites(self):
        """
        ``sync_with_exist`` will not overwrite documents already in eXist.
        """
        db = ExistDB()
        db.removeCollection(self.chunk_collection_path, True)
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        c.sync_with_exist()

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)

        with mock.patch('lexicography.models.ExistDB.load') as load_mock:
            c.sync_with_exist()
            self.assertEqual(load_mock.call_count, 0,
                             "load should not have been called!")

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
Exemple #16
0
    def test_sync_handles_overwrites(self):
        """
        ``sync_with_exist`` will not overwrite documents already in eXist.
        """
        db = ExistDB()
        db.removeCollection(self.chunk_collection_path, True)
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        c.sync_with_exist()

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)

        with mock.patch('lexicography.models.ExistDB.load') as load_mock:
            c.sync_with_exist()
            self.assertEqual(load_mock.call_count, 0,
                             "load should not have been called!")

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
Exemple #17
0
    def check_remove_data_from_exist_and_cache(self, op):
        """
        Check that invoking ``op`` will remove the data from the eXist
        database and the cache.
        """
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=True)
        c.clean()
        method = op if callable(op) else getattr(c, op)
        cache.delete(c.c_hash)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(len(list_collection(db,
                                             self.display_collection_path)),
                         0)

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))

        c.save()
        c._create_cached_data()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        # Only the "xml" data is created on save.
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(len(list_collection(db,
                                             self.display_collection_path)),
                         1)

        method()
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(len(list_collection(db,
                                             self.display_collection_path)),
                         0)
        for key in keys:
            self.assertIsNone(cache.get(key))
Exemple #18
0
    def check_abnormal_remove_data_from_exist_and_cache(self, op):
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=False)
        c.clean()
        cache.delete(c.c_hash)
        method = getattr(c, op)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)

        c.save()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

        with mock.patch('lexicography.models.ExistDB.removeDocument') as \
                remove_mock:
            method()
            self.assertEqual(remove_mock.call_count, 0)

        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)
        for key in keys:
            self.assertIsNone(cache.get(key))
Exemple #19
0
    def check_skip_abnormal_chunks(self, op, collection, *args):
        c = Chunk(data="", is_normal=False)
        c.save()
        self.make_reachable(c)
        db = ExistDB()
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(self.manager, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 0)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)
Exemple #20
0
    def _delete_cached_data(self):
        if self.is_normal:
            db = ExistDB()
            db.removeDocument(self.exist_path("chunks"), True)
            db.removeDocument(self.exist_path("display"), True)

            cache.delete_many(
                self.display_key(kind) for kind in self.key_kinds)
Exemple #21
0
    def check_syncs_normal_chunks(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.make_reachable(c)
        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(self.manager, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)
Exemple #22
0
    def check_syncs_normal_chunks(self, op, collection, *args):
        c = Chunk(data="<div/>", is_normal=True)
        c.save()
        self.make_reachable(c)
        # If it does not have metadata yet, that's fine.
        try:
            c.chunkmetadata.delete()
        except ChunkMetadata.DoesNotExist:
            pass

        # We have to delete the collection because merely saving the
        # chunk causes it to be synced, but this is not what we are
        # testing here. We want to make sure that calling
        # sync_with_exist will perform the sync.
        db = ExistDB()
        db.removeCollection(collection, True)
        self.assertEqual(len(list_collection(db, collection)), 0)

        getattr(self.manager, op)(*args)

        self.assertEqual(len(list_collection(db, collection)), 1)

        # Make sure our chunk was not collected.
        self.assertEqual(self.manager.count(), 1)
Exemple #23
0
    def sync_with_exist(self, db=None):
        # We do not put "abnormal" chunks in exist.
        if not self.is_normal:
            return

        db = db or ExistDB()
        # Reminder: chunks are immutable. So if a chunk has been put
        # in eXist already, then we do not want to reput that data. If
        # we were to overwrite the data with the same value, it is not
        # clear at all whether eXist would stupidly reindex the new
        # data. We proactively avoid the situation.
        path = self.exist_path("chunks")
        if not db.hasDocument(path) and \
           not db.load(self.data.encode("utf-8"), path):
            raise Exception("could not sync with eXist database")
Exemple #24
0
 def setUp(self):
     self.foo = foo = user_model.objects.create(username="******",
                                                password="******")
     scribe = Group.objects.get(name='scribe')
     foo.groups.add(scribe)
     cache.clear()
     db = ExistDB()
     db.removeCollection(self.chunk_collection_path, True)
     db.removeCollection(self.display_collection_path, True)
     return super(ChunkTransactionTestCase, self).setUp()
Exemple #25
0
    def prepare(self, kind, include_unpublished):
        if kind != "xml":
            raise ValueError("the manager only supports preparing XML data; "
                             "future versions may support other kinds")

        self.collect()
        db = ExistDB()
        present = set()
        chunks = self.all_syncable_chunks()
        if not include_unpublished:
            chunks = chunks.filter(changerecord__published=True)
        for chunk in chunks:
            chunk.prepare("xml", True)
            present.add(chunk.c_hash)

        self._remove_absent(db, present, get_collection_path("display"))
Exemple #26
0
    def test_not_cached(self):
        """
        When the data is not cached, get it from eXist.
        """
        cr = ChangeRecord.objects.get(pk=1)
        chunk = cr.c_hash
        key = chunk.display_key("xml")
        xml_doc = cache.get(key)
        _, xml_doc = xml.strip_xml_decl(xml_doc)
        self.assertIsNotNone(xml)

        db = ExistDB()
        cache.delete(key)
        with mock.patch('lexicography.models.ExistDB.getDocument',
                        wraps=db.getDocument) as get_mock:
            self.assertEqual(tasks.fetch_xml(chunk.c_hash), xml_doc)
            self.assertEqual(cache.get(key), xml_doc)
            self.assertEqual(get_mock.call_count, 1)
Exemple #27
0
    def __call__(self, command, _options):
        """
        Load initial data into a new database. This is necessary for BTW
        to run.
        """
        assert_running()

        from django.utils import translation
        translation.activate('en-us')

        db = ExistDB()
        chunk_collection_path = get_collection_path("chunks")

        if db.hasCollection(chunk_collection_path):
            db.removeCollection(chunk_collection_path)

        Chunk.objects.sync_with_exist()

        display_path = get_collection_path("display")
        if db.hasCollection(display_path):
            db.removeCollection(display_path)
        Chunk.objects.prepare("xml", include_unpublished=False)
Exemple #28
0
    def hashes_with_semantic_field(self, sf):
        """
        Returns a set of chunk *hashes* that contain the semantic field
        requested.
        """
        db = ExistDB()
        chunks = set()

        for query_chunk in query_iterator(
                db,
                xquery.format("""\
for $m in collection({db})//btw:sf[@ref = {path}]
return util:document-name($m)""",
                              db=get_collection_path("display"),
                              path=sf)):

            for result in query_chunk.values:
                chunks.add(result)

        return chunks
Exemple #29
0
    def __call__(self, command, _options):
        assert_running()
        db = get_admin_db()

        for (group, desc) in command.new_user_groups.items():
            db.server.addGroup(
                group,
                {'http://exist-db.org/security/description': desc})

        db.server.addAccount(
            command.server_user,
            settings.EXISTDB_SERVER_PASSWORD,
            "", list(command.new_user_groups.keys()),
            True, 0o022,
            {
                'http://exist-db.org/security/description':
                'BTW user'
            })

        db.server.setUserPrimaryGroup(command.server_user, command.btw_group)
        db = ExistDB()
Exemple #30
0
    def __call__(self, command, _options):
        """
        Load initial data into a new database. This is necessary for BTW
        to run.
        """
        assert_running()

        from django.utils import translation
        translation.activate('en-us')

        db = ExistDB()
        chunk_collection_path = get_collection_path("chunks")

        if db.hasCollection(chunk_collection_path):
            db.removeCollection(chunk_collection_path)

        Chunk.objects.sync_with_exist()

        display_path = get_collection_path("display")
        if db.hasCollection(display_path):
            db.removeCollection(display_path)
        Chunk.objects.prepare("xml", True)
Exemple #31
0
    def check_remove_data_from_exist_and_cache(self, op):
        """
        Check that invoking ``op`` will remove the data from the eXist
        database and the cache.
        """
        db = ExistDB()
        c = Chunk(data="<div/>", is_normal=True)
        c.clean()
        method = op if isinstance(op, Callable) else getattr(c, op)
        cache.delete(c.c_hash)
        db.removeCollection(self.chunk_collection_path, True)
        db.removeCollection(self.display_collection_path, True)
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)

        for kind in self.prepare_kinds:
            self.assertIsNone(cache.get(c.display_key(kind)))

        c.save()
        c._create_cached_data()
        keys = [c.display_key(kind) for kind in self.prepare_kinds]

        # Only the "xml" data is created on save.
        self.assertIsNotNone(cache.get(c.display_key("xml")))
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         1)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 1)

        method()
        self.assertEqual(len(list_collection(db, self.chunk_collection_path)),
                         0)
        self.assertEqual(
            len(list_collection(db, self.display_collection_path)), 0)
        for key in keys:
            self.assertIsNone(cache.get(key))
Exemple #32
0
 def list_display_collection(self):
     db = ExistDB()
     return list_collection(db, self.chunk_collection_path)
Exemple #33
0
    def test_complex_document(self):
        # Yeah, we launch it here. The other tests don't need this
        # data so...
        launch_fetch_task()
        entry = create_valid_article()

        cr = entry.latest
        chunk = cr.c_hash
        tasks.prepare_xml.delay(chunk.c_hash).get()

        # Check that the correct results are in the cache.
        result = cache.get(chunk.display_key("xml"))
        db = ExistDB()
        self.assertTrue(db.hasDocument(chunk.exist_path("display")))
        tree = lxml.etree.fromstring(result)

        senses = tree.xpath(
            "/btw:entry/btw:sense-discrimination/btw:sense",
            namespaces=xml.default_namespace_mapping)
        self.assertEqual(len(senses), 4)

        expected_values = [
            [
                "01.02.11n",
                "Person (01.04.04n)",
                "01.04.08n",
                "01.05.05.09.01n",
                "01.06.07.03n",
                "Beautification (02.02.18n)",
                "Lack of beauty (02.02.19n)",
                "Written laws (03.05.01n)",
            ],
            [
                "Belief (02.01.13n)",
                "Belief, trust, confidence (02.01.13.02n)",
                "Act of convincing, conviction (02.01.13.02.02n)",
                "Absence of doubt, confidence (02.01.13.08.11n)",
                "Making certain, assurance (02.01.13.08.11.01.01n)",
                "Expectation (02.01.14n)",
                "02.01.17n",
                "Good taste (02.02.12n)",
                "Bad taste (02.02.13n)",
                "Fashionableness (02.02.14n)",
                "02.02.22n",
                "Education (03.07n)",
            ],
            [
                "01.05.05.12.01n"
            ],
            [
                "02.01.17n",
                "Good taste (02.02.12n)",
                "Bad taste (02.02.13n)",
                "03.07.00.23n",
                "Learning (03.07.03n)"
            ],
        ]

        for ix, (sense, expected) in enumerate(zip(senses, expected_values)):
            sense_label = "sense " + str(ix + 1)
            sfss = sense.xpath("./btw:semantic-fields",
                               namespaces=xml.default_namespace_mapping)
            self.assertEqual(len(sfss), 1,
                             "there should be only one btw:semantic-fields "
                             "in " + sense_label)
            sfs = [sf.text for sf in sfss[0]]
            self.assertEqual(sfs, expected,
                             "the list of semantic fields should be correct "
                             "in " + sense_label)

        sfss = tree.xpath("/btw:entry/btw:overview/btw:semantic-fields",
                          namespaces=xml.default_namespace_mapping)
        self.assertEqual(len(sfss), 1,
                         "there should be only one btw:semantic-fields "
                         "element")
        sfs = [sf.text for sf in sfss[0]]
        self.assertEqual(sfs, [
            "01.02.11n",
            "Person (01.04.04n)",
            "01.04.08n",
            "By eating habits (01.05.05n)",
            "01.06.07n",  # By family relationships ,
            "Belief (02.01.13n)",
            "Expectation (02.01.14n)",
            "02.01.17n",
            "Good taste (02.02.12n)",
            "Bad taste (02.02.13n)",
            "Fashionableness (02.02.14n)",
            "Beautification (02.02.18n)",
            "Lack of beauty (02.02.19n)",
            "02.02.22n",
            "Written laws (03.05.01n)",
            "Education (03.07n)",
            "03.07.00n",
            "Learning (03.07.03n)"
        ],
            "the list of semantic fields should be correct")
        self.assertIsNone(sfss[0].getnext())