def getTextualNode(self, subreference=None): """ Retrieve a passage and store it in the object :param subreference: CtsReference of the passage (Note : if given a list, this should be a list of string that \ compose the reference) :type subreference: Union[CtsReference, URN, str, list] :rtype: CtsPassage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a CtsReference """ if isinstance(subreference, URN): urn = str(subreference) elif isinstance(subreference, CtsReference): urn = "{0}:{1}".format(self.urn, str(subreference)) elif isinstance(subreference, str): if ":" in subreference: urn = subreference else: urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference) elif isinstance(subreference, list): urn = "{0}:{1}".format(self.urn, ".".join(subreference)) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassage(urn=urn)) self._parse_request(response.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0]) return CtsPassage(urn=urn, resource=response, retriever=self.retriever)
def test_export_structured_metadata(self): with open( "./tests/testing_data/capitains/textgroup_with_structured.xml" ) as f: tg = XmlCtsTextgroupMetadata.parse(f) out = xmlparser(tg.export(Mimetypes.XML.CapiTainS.CTS)) ns = {k: v for k, v in XPATH_NAMESPACES.items()} ns["scm"] = "http://schema.org/" ns["saws"] = "http://purl.org/saws/ontology#" ns["dct"] = "http://purl.org/dc/terms/" self.assertCountEqual( out.xpath("./cpt:structured-metadata//scm:birthDate", namespaces=ns), ['457BCE', -384]) self.assertCountEqual( out.xpath("./cpt:structured-metadata//scm:birthPlace", namespaces=ns), ['Stagire', "https://pleiades.stoa.org/places/501625"]) self.assertCountEqual( out.xpath("./ti:work/cpt:structured-metadata/saws:cost", namespaces=ns), [1.5]) self.assertCountEqual( out.xpath( ".//ti:translation/cpt:structured-metadata/dct:dateCopyrighted", namespaces=ns), [1837])
def parse(cls, identifier: str, reference: DtsReference, resolver: "HttpDtsResolver", response: "Response"): o = cls(identifier=identifier, reference=reference, resolver=resolver, resource=xmlparser(response.text)) links = link_header.parse(response.headers.get("Link", "")) links = { link.rel: parse_qs(urlparse(link.href).query) for link in links.links } if links.get("next"): o._next_id = o._dict_to_ref(links.get("next")) if links.get("prev"): o._prev_id = o._dict_to_ref(links.get("prev")) if links.get("parent"): o._parent = o._dict_to_ref(links.get("up")) if links.get("first"): o._first_id = o._dict_to_ref(links.get("first")) if links.get("parent"): o._last_id = o._dict_to_ref(links.get("last")) if links.get("collection"): o._collection = o._dict_to_ref(links.get("collection")) return o
def getValidReff(self, level=1, reference=None): """ Given a resource, CtsText will compute valid reffs :param level: Depth required. If not set, should retrieve first encountered level (1 based) :type level: Int :param reference: CapitainsCtsPassage reference :type reference: CtsReference :rtype: list(str) :returns: List of levels """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) if level == -1: level = len(self.citation) xml = self.retriever.getValidReff( level=level, urn=urn ) xml = xmlparser(xml) self._parse_request(xml.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0]) return [ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=XPATH_NAMESPACES)]
def prevnext(resource): """ Parse a resource to get the prev and next urn :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: (str, str) """ _prev, _next = False, False resource = xmlparser(resource) prevnext = resource.xpath("//ti:prevnext", namespaces=XPATH_NAMESPACES) if len(prevnext) > 0: _next, _prev = None, None prevnext = prevnext[0] _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False) _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False) if len(_next_xpath): _next = _next_xpath[0].split(":")[-1] if len(_prev_xpath): _prev = _prev_xpath[0].split(":")[-1] return _prev, _next
def test_changing_space(self): """ Test when user change default value of export joining char """ X = xmlparser( """<root>in- genium<note place="unspecified">ingenium <hi rend="italic">ll.v</hi>(<hi rend="italic">G</hi>). -nio <hi rend="italic">B.</hi> in ganea <hi rend="italic">J</hi></note><add>n</add>a<add>t</add>us</root>""" ) P = TeiResource(identifier="dummy", resource=X) P.plaintext_string_join = "" self.assertEqual(P.export(Mimetypes.PLAINTEXT, exclude=["note"]), "in- geniumnatus")
def test_str(self): """ Test STR conversion of xml """ P = TeiResource( identifier="dummy", resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) self.assertEqual( P.export(Mimetypes.XML.Std), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )
def test_ingest_and_match(self): """ Ensure matching and parsing XML works correctly """ xml = xmlparser( """<ns0:edition urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2' workUrn='urn:cts:latinLit:phi1294.phi002' xml:lang="lat" xmlns:ns0='http://chs.harvard.edu/xmlns/cts'> <ns0:label xml:lang='eng'>Epigrammata Label</ns0:label> <ns0:label xml:lang='fre'>Epigrammes Label</ns0:label> <ns0:description xml:lang='eng'>W. Heraeus</ns0:description> <ns0:description xml:lang='fre'>G. Heraeus</ns0:description> <ns0:online> <ns0:citationMapping> <ns0:citation label='book' xpath="/tei:div[@n='?']" scope='/tei:TEI/tei:text/tei:body/tei:div'> <ns0:citation label='poem' xpath="/tei:div[@n='?']" scope="/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='?']"> <ns0:citation label='line' xpath="/tei:l[@n='?']" scope="/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='?']/tei:div[@n='?']"></ns0:citation> </ns0:citation> </ns0:citation> </ns0:citationMapping> </ns0:online> </ns0:edition>""".replace("\n", "")) citation = (XmlCtsEditionMetadata.parse(xml)).citation # The citation that should be returned is the root self.assertEqual(citation.name, "book", "Name should have been parsed") self.assertEqual(citation.child.name, "poem", "Name of child should have been parsed") self.assertEqual(citation.child.child.name, "line", "Name of descendants should have been parsed") self.assertEqual(citation.is_root(), True, "Root should be true on root") self.assertEqual(citation.match("1.2"), citation.child, "Matching should make use of root matching") self.assertEqual(citation.match("1.2.4"), citation.child.child, "Matching should make use of root matching") self.assertEqual(citation.match("1"), citation, "Matching should make use of root matching") self.assertEqual( citation.child.match("1.2").name, "poem", "Matching should retrieve poem at 2nd level") self.assertEqual( citation.child.match("1.2.4").name, "line", "Matching should retrieve line at 3rd level") self.assertEqual( citation.child.match("1").name, "book", "Matching retrieve book at 1st level") citation = citation.child self.assertEqual( citation.child.match("1.2").name, "poem", "Matching should retrieve poem at 2nd level") self.assertEqual( citation.child.match("1.2.4").name, "line", "Matching should retrieve line at 3rd level") self.assertEqual( citation.child.match("1").name, "book", "Matching retrieve book at 1st level")
def test_exportable_capacities(self): X = xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' ) P = TeiResource(identifier="dummy", resource=X) self.assertEqual( P.export_capacities, [ Mimetypes.PYTHON.ETREE, Mimetypes.XML.Std, Mimetypes.PYTHON.NestedDict, Mimetypes.PLAINTEXT, Mimetypes.XML.TEI ], "CapitainsCtsPassage should be able to export to given resources")
def firstUrn(resource): """ Parse a resource to get the first URN :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: str """ resource = xmlparser(resource) urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=XPATH_NAMESPACES, magic_string=True) if len(urn) > 0: urn = str(urn[0]) return urn.split(":")[-1]
def test_text(self): """ Test text attribute """ P = TeiResource( identifier="dummy", resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) # Without exclusion0 self.assertEqual(P.export(output=Mimetypes.PLAINTEXT), "Ibis hello b ab excusso missus in astra sago. ") # With Exclusion self.assertEqual( P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
def getLabel(self): """ Retrieve metadata about the text :rtype: Metadata :returns: Dictionary with label informations """ response = xmlparser( self.retriever.getLabel(urn=str(self.urn)) ) self._parse_request( response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0] ) return self.metadata
def test_ingest_single(self): b = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="line" matchPattern="(\\w+).(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])"> <tei:p>This pointer pattern extracts book and poem and line</tei:p> </tei:cRefPattern> </tei:tei> """.replace("\n", "").replace("\s+", " ")) a = Citation.ingest(b) self.assertEqual( a.export(Mimetypes.XML.TEI), """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>""" )
def getPassagePlus(self, reference=None): """ Retrieve a passage and informations around it and store it in the object :param reference: Reference of the passage :type reference: CtsReference or List of text_type :rtype: CtsPassage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassagePlus(urn=urn)) passage = CtsPassage(urn=urn, resource=response, retriever=self.retriever) passage._parse_request(response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0]) self.citation = passage.citation return passage
def test_ingest_single_and(self): text = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" /> </tei:tei> """.replace("\n", "").replace("\s+", " ")) citation = Citation.ingest(text) self.maxDiff = None self.assertEqual( citation.export(Mimetypes.XML.TEI), """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>""" ) self.assertEqual( citation.scope, "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']") self.assertEqual(citation.xpath, "/tei:div[@n='?' and @type='section']") self.assertEqual( citation.fill("1"), "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']" )
def test_ingest_and_match(self): """ Ensure matching and parsing XML works correctly """ xml = xmlparser("""<TEI xmlns="http://www.tei-c.org/ns/1.0"> <refsDecl n="CTS"> <cRefPattern n="line" matchPattern="(\w+).(\w+).(\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])"> <p>This pointer pattern extracts book and poem and line</p> </cRefPattern> <cRefPattern n="poem" matchPattern="(\w+).(\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])"> <p>This pointer pattern extracts book and poem</p> </cRefPattern> <cRefPattern n="book" matchPattern="(\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])"> <p>This pointer pattern extracts book</p> </cRefPattern> </refsDecl> </TEI>""") citation = Citation.ingest(xml) # The citation that should be returned is the root self.assertEqual(citation.name, "book", "Name should have been parsed") self.assertEqual(citation.child.name, "poem", "Name of child should have been parsed") self.assertEqual(citation.child.child.name, "line", "Name of descendants should have been parsed") self.assertEqual(citation.is_root(), True, "Root should be true on root") self.assertEqual(citation.match("1.2"), citation.child, "Matching should make use of root matching") self.assertEqual(citation.match("1.2.4"), citation.child.child, "Matching should make use of root matching") self.assertEqual(citation.match("1"), citation, "Matching should make use of root matching") self.assertEqual(citation.child.match("1.2").name, "poem", "Matching should retrieve poem at 2nd level") self.assertEqual(citation.child.match("1.2.4").name, "line", "Matching should retrieve line at 3rd level") self.assertEqual(citation.child.match("1").name, "book", "Matching retrieve book at 1st level") citation = citation.child self.assertEqual(citation.child.match("1.2").name, "poem", "Matching should retrieve poem at 2nd level") self.assertEqual(citation.child.match("1.2.4").name, "line", "Matching should retrieve line at 3rd level") self.assertEqual(citation.child.match("1").name, "book", "Matching retrieve book at 1st level")
def test_ingest_multiple(self): b = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="line" matchPattern="(\\w+).(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])"> <tei:p>This pointer pattern extracts line</tei:p> </tei:cRefPattern> <tei:cRefPattern n="poem" matchPattern="(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])"> <tei:p>This pointer pattern extracts poem</tei:p> </tei:cRefPattern> <tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])"> <tei:p>This pointer pattern extracts book</tei:p> </tei:cRefPattern> </tei:tei> """.replace("\n", "").replace("\s+", " ")) a = Citation.ingest(b) self.assertEqual( a.export(Mimetypes.XML.TEI), """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>""" ) self.assertEqual( a.child.export(Mimetypes.XML.TEI), """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>""" ) self.assertEqual( a.child.child.export(Mimetypes.XML.TEI), """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>""" ) self.assertEqual( a.child.child.fill(CtsReference("1.2.3")), "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']" )
def __init__(self, resource: str, **kwargs): super(TeiResource, self).__init__(**kwargs) self.resource = xmlparser(resource) self._plaintext_string_join = "" + self.PLAINTEXT_STRING_JOIN
import unittest from six import text_type as str from io import open from MyCapytain.resources.texts.remote.cts import CtsPassage, CtsText from MyCapytain.retrievers.cts5 import HttpCtsRetriever from MyCapytain.common.reference._capitains_cts import CtsReference, URN, Citation from MyCapytain.common.metadata import Metadata from MyCapytain.common.utils.xml import xmlparser from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes, RDF_NAMESPACES from MyCapytain.errors import MissingAttribute import mock with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getFirstUrn.xml") as f: Get_FIRST = xmlparser(f) with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f: Get_FIRST_EMPTY = xmlparser(f) with open("tests/testing_data/cts/getlabel.xml") as f: GET_LABEL = xmlparser(f) with open("tests/testing_data/cts/getValidReff.1.1.xml") as f: GET_VALID_REFF_1_1 = xmlparser(f)
from MyCapytain.resolvers.cts.api import HttpCtsResolver from MyCapytain.retrievers.cts5 import HttpCtsRetriever from MyCapytain.common.utils.xml import xmlparser from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes from MyCapytain.resources.prototypes.cts.text import PrototypeCtsPassage from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsTextgroupMetadata, XmlCtsWorkMetadata, XmlCtsTextMetadata from MyCapytain.resources.prototypes.metadata import Collection from unittest import TestCase from mock import MagicMock with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f: NEXT = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f: PREV = xmlparser(f) with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF_FULL = xmlparser(f) with open("tests/testing_data/cts/getValidReff.1.1.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getCapabilities.xml") as f: GET_CAPABILITIES = xmlparser(f) with open("tests/testing_data/cts/getCapabilities1294002.xml") as f: GET_CAPABILITIES_FILTERED = xmlparser(f) with open("tests/testing_data/cts/getPassageOtherTest.xml") as f:
def test_xml(self): X = xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' ) P = TeiResource(identifier="dummy", resource=X) self.assertIs(X, P.xml)
[("tei", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and xml lang on @n/div-{epidoc}"), ("tei", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and xml lang on @xml:base/div-{epidoc}"), ("tei", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and without xml lang on @n/div-{epidoc}"), ("tei", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and without xml lang on @xml:base/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", True, "Epidoc works with urn and xml lang on @n/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False, "Epidoc fails with urn and xml lang on @xml:base/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}'>", False, "Epidoc fails with urn and without xml lang on @n/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}'>", False, "Epidoc fails with urn and without xml lang on @xml:base/div-{epidoc}")] for type_epidoc in ["edition", "translation", "commentary"] ] XMLLANG_DOCUMENTS = [ (scheme, tostring(xmlparser( TEMPLATES.replace(source, replacement).format(urn=URN, lang=LANG)), encoding=str), boolean, msg + " (" + replacement.format(urn=URN, lang=LANG) + ")") for scheme, source, replacement, boolean, msg in XMLLANG_DOCUMENTS ]
def __export__(self, output=None, domain=""): if output == Mimetypes.PYTHON.ETREE: return xmlparser(self.export(output=Mimetypes.XML.CTS))
import unittest from io import open import xmlunittest from lxml import etree import MyCapytain.common.reference import MyCapytain.common.reference._capitains_cts import MyCapytain.errors import MyCapytain.resources.texts.base.tei import MyCapytain.resources.texts.local.capitains.cts from MyCapytain.common.utils.xml import xmlparser from tests.resources.texts.local.commonTests import CapitainsXmlTextTest, CapitainsXmlPassageTests, CapitainsXMLRangePassageTests objectifiedParser = lambda x: xmlparser(x, objectify=False) class TestLocalXMLTextImplementation(CapitainsXmlTextTest, unittest.TestCase, xmlunittest.XmlTestMixin): """ Test XML Implementation of resources found in local file """ def setUp(self): self.text = open("tests/testing_data/texts/sample.xml", "rb") self.TEI = MyCapytain.resources.texts.local.capitains.cts.CapitainsCtsText( resource=objectifiedParser(self.text), urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.treeroot = etree._ElementTree() with open("tests/testing_data/texts/text_or_xpath.xml") as f: self.text_complex = MyCapytain.resources.texts.local.capitains.cts.CapitainsCtsText( resource=objectifiedParser(f),