def prevnext(resource): """ Parse a resource to get the prev and next urn :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: (str, str) """ _prev, _next = False, False resource = xmlparser(resource) prevnext = resource.xpath("//ti:prevnext", namespaces=XPATH_NAMESPACES) if len(prevnext) > 0: _next, _prev = None, None prevnext = prevnext[0] _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False) _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=XPATH_NAMESPACES, smart_strings=False) if len(_next_xpath): _next = _next_xpath[0].split(":")[-1] if len(_prev_xpath): _prev = _prev_xpath[0].split(":")[-1] return _prev, _next
def getPassage(self, reference=None): """ Retrieve a passage and store it in the object :param reference: Reference of the passage :type reference: Reference, or URN, or str or list(str) :rtype: Passage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if isinstance(reference, URN): urn = str(reference) elif isinstance(reference, Reference): urn = "{0}:{1}".format(self.urn, str(reference)) elif isinstance(reference, str): if ":" in reference: urn = reference else: urn = "{0}:{1}".format(self.urn, reference) elif isinstance(reference, list): urn = "{0}:{1}".format(self.urn, ".".join(reference)) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassage(urn=urn)) self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0]) return Passage(urn=urn, resource=response, retriever=self.retriever)
def getTextualNode(self, subreference=None): """ Retrieve a passage and store it in the object :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \ compose the reference) :type subreference: Union[Reference, URN, str, list] :rtype: Passage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if isinstance(subreference, URN): urn = str(subreference) elif isinstance(subreference, Reference): urn = "{0}:{1}".format(self.urn, str(subreference)) elif isinstance(subreference, str): if ":" in subreference: urn = subreference else: urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference) elif isinstance(subreference, list): urn = "{0}:{1}".format(self.urn, ".".join(subreference)) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassage(urn=urn)) self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0]) return Passage(urn=urn, resource=response, retriever=self.retriever)
def getValidReff(self, level=1, reference=None): """ Given a resource, CitableText will compute valid reffs :param level: Depth required. If not set, should retrieve first encountered level (1 based) :type level: Int :param reference: Passage reference :type reference: Reference :rtype: list(str) :returns: List of levels """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) if level == -1: level = len(self.citation) xml = self.retriever.getValidReff( level=level, urn=urn ) xml = xmlparser(xml) self.__parse_request__(xml.xpath("//ti:request", namespaces=NS)[0]) return [ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=NS)]
def test_text(self): """ Test text attribute """ P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')) # Without exclusion0 self.assertEqual(P.export(output=Mimetypes.PLAINTEXT), "Ibis hello b ab excusso missus in astra sago. ") # With Exclusion self.assertEqual(P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
def prevnext(resource): """ Parse a resource to get the prev and next urn :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: (str, str) """ _prev, _next = False, False resource = xmlparser(resource) prevnext = resource.xpath("//ti:prevnext", namespaces=NS) if len(prevnext) > 0: _next, _prev = None, None prevnext = prevnext[0] _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=NS, smart_strings=False) _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=NS, smart_strings=False) if len(_next_xpath): _next = _next_xpath[0].split(":")[-1] if len(_prev_xpath): _prev = _prev_xpath[0].split(":")[-1] return _prev, _next
def __init__(self, urn=None, citation=None, resource=None): self._passages = OrderedDict() # Represents real full passages / reffs informations. Only way to set it up is getValidReff without passage ? self._orphan = defaultdict(Reference) # Represents passage we got without asking for all. Storing convenience ? self._cRefPattern = MyCapytain.resources.texts.tei.Citation() self.resource = None self.xml = None self._URN = None if citation is not None: self.citation = citation if resource is not None: self.resource = resource self.xml = xmlparser(resource) self.__findCRefPattern(self.xml) try: xml = self.xml.xpath(self.citation.scope, namespaces=NS)[0] except IndexError: msg = "Main citation scope does not result in any result ({0})".format(self.citation.scope) raise RefsDeclError(msg) except Exception as E: raise E self._passages = Passage(resource=xml, citation=self.citation, urn=self.urn, id=None)
def getValidReff(self, level=1, reference=None): """ Given a resource, CitableText will compute valid reffs :param level: Depth required. If not set, should retrieve first encountered level (1 based) :type level: Int :param reference: CapitainsCtsPassage reference :type reference: Reference :rtype: list(str) :returns: List of levels """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) if level == -1: level = len(self.citation) xml = self.retriever.getValidReff(level=level, urn=urn) xml = xmlparser(xml) self.__parse_request__( xml.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0]) return [ ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=XPATH_NAMESPACES) ]
def parse(self, resource): """ Parse a resource :param resource: Element rerpresenting a work :param type: basestring, etree._Element """ self.xml = xmlparser(resource) lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: self.lang = lang for child in self.xml.xpath("ti:title", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["title"][lg] = child.text self.__editions = xpathDict( xml=self.xml, xpath="ti:edition", children=Edition, parents=tuple([self]) + self.parents ) self.__translations = xpathDict( xml=self.xml, xpath="ti:translation", children=Translation, parents=tuple([self]) + self.parents ) self.texts = collections.defaultdict(Text) for urn in self.__editions: self.texts[urn] = self.__editions[urn] for urn in self.__translations: self.texts[urn] = self.__translations[urn] return self.texts
def getTextualNode(self, subreference=None): """ Retrieve a passage and store it in the object :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \ compose the reference) :type subreference: Union[Reference, URN, str, list] :rtype: CtsPassage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if isinstance(subreference, URN): urn = str(subreference) elif isinstance(subreference, Reference): urn = "{0}:{1}".format(self.urn, str(subreference)) elif isinstance(subreference, str): if ":" in subreference: urn = subreference else: urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference) elif isinstance(subreference, list): urn = "{0}:{1}".format(self.urn, ".".join(subreference)) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassage(urn=urn)) self.__parse_request__( response.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0]) return CtsPassage(urn=urn, resource=response, retriever=self.retriever)
def test_text(self): """ Test text attribute """ P = Passage(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')) # Without exclusion0 self.assertEqual(P.text(), "Ibis hello b ab excusso missus in astra sago. ") # With Exclusion self.assertEqual(P.text(exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
def export(self, output="xml"): """ Create a {output} version of the TextInventory :param output: output to be chosen (Only XML for now) :type output: basestring :rtype: lxml.etree._Element :returns: XML representation of the object """ return xmlparser(str(self))
def parse(resource, parent=None): xml = xmlparser(resource) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") o = XmlCtsCommentaryMetadata(urn=xml.get("urn"), parent=parent) if lang is not None: o.lang = lang XmlCtsCommentaryMetadata.parse_metadata(o, xml) return o
def test_str(self): """ Test STR conversion of xml """ P = Passage(resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) self.assertEqual( str(P), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )
def parse(self, resource): """ Parse a resource :param resource: Element representing the text inventory :param type: basestring, etree._Element """ self.xml = xmlparser(resource) self.textgroups = xpathDict(xml=self.xml, xpath="//ti:textgroup", children=TextGroup, parents=self) return self.textgroups
def test_changing_space(self): """ Test when user change default value of export joining char """ X = xmlparser( """<root>in- genium<note place="unspecified">ingenium <hi rend="italic">ll.v</hi>(<hi rend="italic">G</hi>). -nio <hi rend="italic">B.</hi> in ganea <hi rend="italic">J</hi></note><add>n</add>a<add>t</add>us</root>""" ) P = TEIResource(identifier="dummy", resource=X) P.plaintext_string_join = "" self.assertEqual(P.export(Mimetypes.PLAINTEXT, exclude=["note"]), "in- geniumnatus")
def test_str(self): """ Test STR conversion of xml """ P = TEIResource( identifier="dummy", resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) self.assertEqual( P.export(Mimetypes.XML.Std), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )
def test_text(self): """ Test text attribute """ P = Passage(resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) # Without exclusion0 self.assertEqual(P.text(), "Ibis hello b ab excusso missus in astra sago. ") # With Exclusion self.assertEqual(P.text(exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
def test_exportable_capacities(self): X = xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' ) P = TEIResource(identifier="dummy", resource=X) self.assertEqual( P.export_capacities, [ Mimetypes.PYTHON.ETREE, Mimetypes.XML.Std, Mimetypes.PYTHON.NestedDict, Mimetypes.PLAINTEXT, Mimetypes.XML.TEI ], "CapitainsCtsPassage should be able to export to given resources")
def getLabel(self): """ Retrieve metadata about the text :rtype: Metadata :returns: Dictionary with label informations """ response = xmlparser(self.retriever.getLabel(urn=str(self.urn))) self.__parse_request__( response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0]) return self.metadata
def test_text(self): """ Test text attribute """ P = TEIResource( identifier="dummy", resource=xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' )) # Without exclusion0 self.assertEqual(P.export(output=Mimetypes.PLAINTEXT), "Ibis hello b ab excusso missus in astra sago. ") # With Exclusion self.assertEqual( P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
def firstUrn(resource): """ Parse a resource to get the first URN :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: str """ resource = xmlparser(resource) urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=NS, magic_string=True) if len(urn) > 0: urn = str(urn[0]) return urn.split(":")[-1]
def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.XML.CTS: return str(self)
def parse(self, resource): """ Parse a resource :param resource: Element rerpresenting the textgroup :param type: basestring, etree._Element """ self.xml = xmlparser(resource) for child in self.xml.xpath("ti:groupname", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["groupname"][lg] = child.text self.works = xpathDict(xml=self.xml, xpath="ti:work", children=Work, parents=(self, self.parents)) return self.works
def test_ingest_single_and(self): text = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" /> </tei:tei> """.replace("\n", "").replace("\s+", " ")) citation = Citation.ingest(text) self.maxDiff = None self.assertEqual( str(citation), """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>""" ) self.assertEqual(citation.scope, "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']") self.assertEqual(citation.xpath, "/tei:div[@n='?' and @type='section']") self.assertEqual(citation.fill("1"), "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']")
def getLabel(self): """ Retrieve metadata about the text :rtype: Metadata :returns: Dictionary with label informations """ response = xmlparser( self.retriever.getLabel(urn=str(self.urn)) ) self.__parse_request__( response.xpath("//ti:reply/ti:label", namespaces=NS)[0] ) return self.metadata
def export(self, output="xml", **kwargs): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring, citation :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == "xml": return xmlparser(str(self)) elif issubclass(output, text.Text): complete_metadata = self.metadata for parent in self.parents: if isinstance(parent, inventory.Resource): complete_metadata = complete_metadata + parent.metadata return output(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
def test_ingest_single(self): b = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="line" matchPattern="(\\w+).(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])"> <tei:p>This pointer pattern extracts book and poem and line</tei:p> </tei:cRefPattern> </tei:tei> """.replace("\n", "").replace("\s+", " ")) a = Citation.ingest(b) self.assertEqual( str(a), """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>""" )
def test_get_passage_plus_formatted(self): response = self.endpoint.getPassagePlus("urn:cts:farsiLit:hafez.divan:1.1.1.2", output=XML) parsed_response = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.2") self.assertEqual( parsed_response.text().strip(), "که عشق آسان نمود اول ولی افتاد مشکلها ***", "API Response should be parsable by MyCapytain Library" ) self.assertIn( "<prev><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.1.1</urn></prev>", response, "Previous URN should be found" ) self.assertIn( "<next><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.2.1</urn></next>", response, "Next URN should be found" )
def firstUrn(resource): """ Parse a resource to get the first URN :param resource: XML Resource :type resource: etree._Element :return: Tuple representing previous and next urn :rtype: str """ resource = xmlparser(resource) urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=XPATH_NAMESPACES, magic_string=True) if len(urn) > 0: urn = str(urn[0]) return urn.split(":")[-1]
def parse(resource): """ Parse a resource :param resource: Element representing the text inventory :param type: basestring, etree._Element """ xml = xmlparser(resource) o = XmlCtsTextInventoryMetadata( name=xml.xpath("//ti:TextInventory", namespaces=XPATH_NAMESPACES)[0].get("tiid") or "") # Parse textgroups xpathDict(xml=xml, xpath='//ti:textgroup', cls=XmlCtsTextgroupMetadata, parent=o) return o
def parse(resource, parent=None): """ Parse a textgroup resource :param resource: Element representing the textgroup :param parent: Parent of the textgroup """ xml = xmlparser(resource) o = XmlCtsTextgroupMetadata(urn=xml.get("urn"), parent=parent) for child in xml.xpath("ti:groupname", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: o.set_cts_property("groupname", child.text, lg) # Parse Works xpathDict(xml=xml, xpath='ti:work', cls=XmlCtsWorkMetadata, parent=o) __parse_structured_metadata__(o, xml) return o
def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs): """ Create a {format} version of the Work :param output: Format to be chosen (Only XML for now) :type output: basestring, citation :param domain: Domain to prefix IDs :type domain: str :rtype: lxml.etree._Element :returns: XML representation of the object """ if output == Mimetypes.PYTHON.ETREE: return xmlparser(str(self)) elif output == Mimetypes.PYTHON.MyCapytain.ReadableText: complete_metadata = self.metadata for parent in self.parents: if isinstance(parent, cts.CTSCollection) and hasattr(parent, "metadata"): complete_metadata = complete_metadata + parent.metadata return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs) elif output == Mimetypes.XML.CTS: return str(self)
def getPassagePlus(self, reference=None): """ Retrieve a passage and informations around it and store it in the object :param reference: Reference of the passage :type reference: Reference or List of text_type :rtype: Passage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassagePlus(urn=urn)) self.__parse_request__(response.xpath("//ti:reply/ti:label", namespaces=NS)[0]) passage = Passage(urn=urn, resource=response, retriever=self.retriever) passage.metadata, passage.citation = self.metadata, self.citation return passage
def parse(self, resource): """ Parse a resource to feed the object :param resource: An xml representation object :type resource: basestring or lxml.etree._Element :returns: None """ self.xml = xmlparser(resource) self.urn = URN(self.xml.get("urn")) self.id = str(self.urn) if self.subtype == "Translation": lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: self.lang = lang for child in self.xml.xpath("ti:description", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["description"][lg] = child.text for child in self.xml.xpath("ti:label", namespaces=NS): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: self.metadata["label"][lg] = child.text self.__findCitations( xml=self.xml, xpath="ti:online/ti:citationMapping/ti:citation" ) online = self.xml.xpath("ti:online", namespaces=NS) if len(online) > 0: online = online[0] self.docname = online.get("docname") for validate in online.xpath("ti:validate", namespaces=NS): self.validate = validate.get("schema") for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS): self.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI") return None
def test_ingest_single_and(self): text = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" /> </tei:tei> """.replace("\n", "").replace("\s+", " ")) citation = Citation.ingest(text) self.maxDiff = None self.assertEqual( str(citation), """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>""" ) self.assertEqual( citation.scope, "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']") self.assertEqual(citation.xpath, "/tei:div[@n='?' and @type='section']") self.assertEqual( citation.fill("1"), "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']" )
def test_get_label(self): """Check get Label""" # Need to parse with Citation and parse individually or simply check for some equality data = self.app.get("/cts?request=GetLabel&urn=urn:cts:latinLit:phi1294.phi002.perseus-lat2")\ .data.decode("utf-8").replace("\n", "") parsed = xmlparser(data) label = parsed.xpath(".//ti:label", namespaces=XPATH_NAMESPACES) label_str = re.sub("\s+", " ", tostring(label[0], encoding=str)).replace("\n", "") self.assertIn( '<groupname xml:lang="eng">Martial</groupname>', label_str, "groupname should be exported correctly" ) self.assertIn( '<title xml:lang="eng">Epigrammata</title>', label_str, "title should be exported correctly" ) self.assertIn( '<description xml:lang="eng"> M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus </description>', label_str, "description should be exported correctly" ) self.assertIn( '<label xml:lang="eng">Epigrammata</label>', label_str, "label should be exported correctly" ) citation = Citation.ingest(label[0]) self.assertEqual( len(citation), 3, "There should be three level of citation" ) self.assertEqual( citation.name, "book", "First level is book" ) if self.cache is not None: self.assertGreater( len(self.cache.cache._cache), 0, "There should be something cached" )
def test_ingest_multiple(self): b = xmlparser(""" <tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0"> <tei:cRefPattern n="line" matchPattern="(\\w+).(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])"> <tei:p>This pointer pattern extracts line</tei:p> </tei:cRefPattern> <tei:cRefPattern n="poem" matchPattern="(\\w+).(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])"> <tei:p>This pointer pattern extracts poem</tei:p> </tei:cRefPattern> <tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])"> <tei:p>This pointer pattern extracts book</tei:p> </tei:cRefPattern> </tei:tei> """.replace("\n", "").replace("\s+", " ")) a = Citation.ingest(b) self.assertEqual( str(a), """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>""" ) self.assertEqual( str(a.child), """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>""" ) self.assertEqual( str(a.child.child), """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>""" ) self.assertEqual( a.child.child.fill(Reference("1.2.3")), "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']" )
def parse(resource, parent=None): """ Parse a resource :param resource: Element rerpresenting a work :param type: basestring, etree._Element :param parent: Parent of the object :type parent: XmlCtsTextgroupMetadata """ xml = xmlparser(resource) o = XmlCtsWorkMetadata(urn=xml.get("urn"), parent=parent) lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang") if lang is not None: o.lang = lang for child in xml.xpath("ti:title", namespaces=XPATH_NAMESPACES): lg = child.get("{http://www.w3.org/XML/1998/namespace}lang") if lg is not None: o.set_cts_property("title", child.text, lg) # Parse children xpathDict(xml=xml, xpath='ti:edition', cls=XmlCtsEditionMetadata, parent=o) xpathDict(xml=xml, xpath='ti:translation', cls=XmlCtsTranslationMetadata, parent=o) # Added for commentary xpathDict(xml=xml, xpath='ti:commentary', cls=XmlCtsCommentaryMetadata, parent=o) __parse_structured_metadata__(o, xml) return o
def getPassagePlus(self, reference=None): """ Retrieve a passage and informations around it and store it in the object :param reference: Reference of the passage :type reference: Reference or List of text_type :rtype: CtsPassage :returns: Object representing the passage :raises: *TypeError* when reference is not a list or a Reference """ if reference: urn = "{0}:{1}".format(self.urn, reference) else: urn = str(self.urn) response = xmlparser(self.retriever.getPassagePlus(urn=urn)) passage = CtsPassage(urn=urn, resource=response, retriever=self.retriever) passage.__parse_request__( response.xpath("//ti:reply/ti:label", namespaces=XPATH_NAMESPACES)[0]) self.citation = passage.citation return passage
def __init__(self, urn=None, citation=None, resource=None, autoreffs=False): super(Text, self).__init__(urn=urn, citation=citation) self._passages = Passage() self._orphan = defaultdict( Reference ) # Represents passage we got without asking for all. Storing convenience ? self._cRefPattern = MyCapytain.resources.texts.tei.Citation() self.xml = None if citation is not None: self.citation = citation if resource is not None: self.resource = resource self.xml = xmlparser(resource) self.__findCRefPattern(self.xml) if autoreffs is True: self.parse()
from __future__ import unicode_literals import unittest from six import text_type as str from io import open from MyCapytain.common.utils import xmlparser, NS from MyCapytain.resources.texts.api import * from MyCapytain.resources.texts.tei import Citation from MyCapytain.retrievers.cts5 import CTS from MyCapytain.common.reference import Reference, URN from lxml import etree import mock with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getFirstUrn.xml") as f: Get_FIRST = xmlparser(f) with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f: Get_FIRST_EMPTY = xmlparser(f) with open("tests/testing_data/cts/getlabel.xml") as f: GET_LABEL = xmlparser(f) class TestAPIText(unittest.TestCase):
def test_xml(self): X = xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>') P = TEIResource(resource=X) self.assertIs(X, P.xml)
def test_str(self): """ Test STR conversion of xml """ P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')) self.assertEqual(str(P), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')
def __export__(self, output=None, domain=""): if output == Mimetypes.PYTHON.ETREE: return xmlparser(self.export(output=Mimetypes.XML.CTS))
from MyCapytain.resolvers.cts.api import HttpCtsResolver from MyCapytain.retrievers.cts5 import HttpCtsRetriever from MyCapytain.common.utils import xmlparser from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes from MyCapytain.resources.prototypes.text import Passage from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsTextgroupMetadata, XmlCtsWorkMetadata, XmlCtsTextMetadata from MyCapytain.resources.prototypes.metadata import Collection from unittest import TestCase from mock import MagicMock with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f: NEXT = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f: PREV = xmlparser(f) with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF_FULL = xmlparser(f) with open("tests/testing_data/cts/getValidReff.1.1.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getCapabilities.xml") as f: GET_CAPABILITIES = xmlparser(f) with open("tests/testing_data/cts/getCapabilities1294002.xml") as f: GET_CAPABILITIES_FILTERED = xmlparser(f) with open("tests/testing_data/cts/getPassageOtherTest.xml") as f: GET_PASSAGE_CITATION_FAILURE = f.read()
def test_get_passage_formatted(self): response = self.endpoint.getPassage("urn:cts:farsiLit:hafez.divan:1.1.1.1", output=XML) p = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.1") """
def test_xml(self): X = xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' ) P = Passage(resource=X) self.assertIs(X, P.xml)
def parse(resource, parent=None): xml = xmlparser(resource) o = XmlCtsEditionMetadata(urn=xml.get("urn"), parent=parent) XmlCtsEditionMetadata.parse_metadata(o, xml) return o
def __init__(self, resource, **kwargs): super(TEIResource, self).__init__(**kwargs) self.resource = xmlparser(resource) self.__plaintext_string_join__ = "" + self.PLAINTEXT_STRING_JOIN
def xmlparse(self, file): """ Parse a XML file :param file: Opened File :return: Tree """ return xmlparser(file)
from MyCapytain.resolvers.cts.api import HttpCTSResolver from MyCapytain.retrievers.cts5 import CTS from MyCapytain.common.utils import xmlparser from MyCapytain.common.constants import NS, Mimetypes from MyCapytain.common.metadata import Metadatum from MyCapytain.resources.prototypes.text import Passage from MyCapytain.resources.collections.cts import TextInventory, TextGroup, Work, Text from MyCapytain.resources.prototypes.metadata import Collection from unittest import TestCase from mock import MagicMock with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f: NEXT = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f: PREV = xmlparser(f) with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF_FULL = xmlparser(f) with open("tests/testing_data/cts/getValidReff.1.1.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getCapabilities.xml") as f: GET_CAPABILITIES = xmlparser(f) with open("tests/testing_data/cts/getCapabilities1294002.xml") as f: GET_CAPABILITIES_FILTERED = xmlparser(f)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import unittest from six import text_type as str from io import open from MyCapytain.resources.texts.api.cts import Passage, Text from MyCapytain.retrievers.cts5 import CTS from MyCapytain.common.reference import Reference, Citation, URN from MyCapytain.common.metadata import Metadata, Metadatum from MyCapytain.common.utils import xmlparser, NS import mock with open("tests/testing_data/cts/getValidReff.xml") as f: GET_VALID_REFF = xmlparser(f) with open("tests/testing_data/cts/getpassage.xml") as f: GET_PASSAGE = xmlparser(f) with open("tests/testing_data/cts/getpassageplus.xml") as f: GET_PASSAGE_PLUS = xmlparser(f) with open("tests/testing_data/cts/getprevnexturn.xml") as f: NEXT_PREV = xmlparser(f) with open("tests/testing_data/cts/getFirstUrn.xml") as f: Get_FIRST = xmlparser(f) with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f: Get_FIRST_EMPTY = xmlparser(f) with open("tests/testing_data/cts/getlabel.xml") as f: GET_LABEL = xmlparser(f) with open("tests/testing_data/cts/getValidReff.1.1.xml") as f: GET_VALID_REFF_1_1 = xmlparser(f)
def test_xml(self): X = xmlparser( '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>' ) P = TEIResource(identifier="dummy", resource=X) self.assertIs(X, P.xml)
import unittest from io import open import xmlunittest from lxml import etree from MyCapytain.common.utils import xmlparser import MyCapytain.common.reference import MyCapytain.errors import MyCapytain.resources.texts.encodings import MyCapytain.resources.texts.locals.tei from tests.resources.commonTests import CapitainsXmlTextTest, CapitainsXmlPassageTests, CapitainsXMLRangePassageTests objectifiedParser = lambda x: xmlparser(x, objectify=False) class TestLocalXMLTextImplementation(CapitainsXmlTextTest, unittest.TestCase, xmlunittest.XmlTestMixin): """ Test XML Implementation of resources found in local file """ def setUp(self): self.text = open("tests/testing_data/texts/sample.xml", "rb") self.TEI = MyCapytain.resources.texts.locals.tei.Text( resource=objectifiedParser(self.text), urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2" ) self.treeroot = etree._ElementTree() with open("tests/testing_data/texts/text_or_xpath.xml") as f: self.text_complex = MyCapytain.resources.texts.locals.tei.Text(
"TEI fails with urn and xml lang on @xml:base/div-{epidoc}"), ("tei", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and without xml lang on @n/div-{epidoc}"), ("tei", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", False, "TEI fails with urn and without xml lang on @xml:base/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", True, "Epidoc works with urn and xml lang on @n/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False, "Epidoc fails with urn and xml lang on @xml:base/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}'>", False, "Epidoc fails with urn and without xml lang on @n/div-{epidoc}"), ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}'>", False, "Epidoc fails with urn and without xml lang on @xml:base/div-{epidoc}") ] for type_epidoc in ["edition", "translation", "commentary"] ] XMLLANG_DOCUMENTS = [ ( scheme, tostring(xmlparser(TEMPLATES.replace(source, replacement).format(urn=URN, lang=LANG)), encoding=str), boolean, msg + " ("+replacement.format(urn=URN, lang=LANG)+")" ) for scheme, source, replacement, boolean, msg in XMLLANG_DOCUMENTS ]
def __init__(self, resource, **kwargs): super(TEIResource, self).__init__(**kwargs) self.resource = xmlparser(resource)