def prepare_xml(pk): """ This function prepares a chunk for display and caches the result of the prepared XML. :param pk: The primary key of the chunk to prepare. :type pk: :class:`int` """ # By using atomicity and using select_for_update we are # effectively preventing other prepare_xml tasks from working on # the same chunk at the same time. with transaction.atomic(): chunk = Chunk.objects.get(pk=pk) key = chunk.display_key("xml") logger.debug("%s processing...", key) meta, _ = ChunkMetadata.objects \ .select_for_update() \ .get_or_create(chunk=chunk) data = chunk.data xml, sf_records = prepare_article_data(data) cache.set(key, xml, timeout=settings.LEXICOGRAPHY_XML_TIMEOUT) logger.debug("%s is set", key) sha1 = hashlib.sha1() sha1.update(xml.encode('utf-8')) xml_hash = sha1.hexdigest() db = ExistDB() path = get_path_for_chunk_hash("display", pk) absent = not db.hasDocument(path) if meta.xml_hash != xml_hash or absent: # This is something that should not happen ever. It has # happened once in development but it is unclear what could # have been the cause. if meta.xml_hash == xml_hash and absent: logger.error( "%s was missing from eXist but had a value " "already set and equal to the new hash; this " "should not happen!", path) meta.semantic_fields.set(sf_records) # Technically, if it was created then xml_hash is already # set, but putting this in an conditional block does not # provide for better performance. meta.xml_hash = xml_hash meta.save() if not db.load(xml.encode("utf-8"), path): raise Exception("could not sync with eXist database")
def prepare_xml(pk): """ This function prepares a chunk for display and caches the result of the prepared XML. :param pk: The primary key of the chunk to prepare. :type pk: :class:`int` """ # By using atomicity and using select_for_update we are # effectively preventing other prepare_xml tasks from working on # the same chunk at the same time. with transaction.atomic(): chunk = Chunk.objects.get(pk=pk) key = chunk.display_key("xml") logger.debug("%s processing...", key) meta, _ = ChunkMetadata.objects \ .select_for_update() \ .get_or_create(chunk=chunk) data = chunk.data xml, sf_records = prepare_article_data(data) cache.set(key, xml, timeout=settings.LEXICOGRAPHY_XML_TIMEOUT) logger.debug("%s is set", key) sha1 = hashlib.sha1() sha1.update(xml.encode('utf-8')) xml_hash = sha1.hexdigest() db = ExistDB() path = get_path_for_chunk_hash("display", pk) absent = not db.hasDocument(path) if meta.xml_hash != xml_hash or absent: # This is something that should not happen ever. It has # happened once in development but it is unclear what could # have been the cause. if meta.xml_hash == xml_hash and absent: logger.error("%s was missing from eXist but had a value " "already set and equal to the new hash; this " "should not happen!", path) meta.semantic_fields.set(sf_records) # Technically, if it was created then xml_hash is already # set, but putting this in an conditional block does not # provide for better performance. meta.xml_hash = xml_hash meta.save() if not db.load(xml.encode("utf-8"), path): raise Exception("could not sync with eXist database")
def test_complex_document(self): # Yeah, we launch it here. The other tests don't need this # data so... launch_fetch_task() entry = create_valid_article() cr = entry.latest chunk = cr.c_hash tasks.prepare_xml.delay(chunk.c_hash).get() # Check that the correct results are in the cache. result = cache.get(chunk.display_key("xml")) db = ExistDB() self.assertTrue(db.hasDocument(chunk.exist_path("display"))) tree = lxml.etree.fromstring(result) senses = tree.xpath( "/btw:entry/btw:sense-discrimination/btw:sense", namespaces=xml.default_namespace_mapping) self.assertEqual(len(senses), 4) expected_values = [ [ "01.02.11n", "Person (01.04.04n)", "01.04.08n", "01.05.05.09.01n", "01.06.07.03n", "Beautification (02.02.18n)", "Lack of beauty (02.02.19n)", "Written laws (03.05.01n)", ], [ "Belief (02.01.13n)", "Belief, trust, confidence (02.01.13.02n)", "Act of convincing, conviction (02.01.13.02.02n)", "Absence of doubt, confidence (02.01.13.08.11n)", "Making certain, assurance (02.01.13.08.11.01.01n)", "Expectation (02.01.14n)", "02.01.17n", "Good taste (02.02.12n)", "Bad taste (02.02.13n)", "Fashionableness (02.02.14n)", "02.02.22n", "Education (03.07n)", ], [ "01.05.05.12.01n" ], [ "02.01.17n", "Good taste (02.02.12n)", "Bad taste (02.02.13n)", "03.07.00.23n", "Learning (03.07.03n)" ], ] for ix, (sense, expected) in enumerate(zip(senses, expected_values)): sense_label = "sense " + str(ix + 1) sfss = sense.xpath("./btw:semantic-fields", namespaces=xml.default_namespace_mapping) self.assertEqual(len(sfss), 1, "there should be only one btw:semantic-fields " "in " + sense_label) sfs = [sf.text for sf in sfss[0]] self.assertEqual(sfs, expected, "the list of semantic fields should be correct " "in " + sense_label) sfss = tree.xpath("/btw:entry/btw:overview/btw:semantic-fields", namespaces=xml.default_namespace_mapping) self.assertEqual(len(sfss), 1, "there should be only one btw:semantic-fields " "element") sfs = [sf.text for sf in sfss[0]] self.assertEqual(sfs, [ "01.02.11n", "Person (01.04.04n)", "01.04.08n", "By eating habits (01.05.05n)", "01.06.07n", # By family relationships , "Belief (02.01.13n)", "Expectation (02.01.14n)", "02.01.17n", "Good taste (02.02.12n)", "Bad taste (02.02.13n)", "Fashionableness (02.02.14n)", "Beautification (02.02.18n)", "Lack of beauty (02.02.19n)", "02.02.22n", "Written laws (03.05.01n)", "Education (03.07n)", "03.07.00n", "Learning (03.07.03n)" ], "the list of semantic fields should be correct") self.assertIsNone(sfss[0].getnext())