Example #1
0
    def prevnext(resource):
        """ Parse a resource to get the prev and next urn

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: (str, str)
        """
        _prev, _next = False, False
        resource = xmlparser(resource)
        prevnext = resource.xpath("//ti:prevnext", namespaces=XPATH_NAMESPACES)

        if len(prevnext) > 0:
            _next, _prev = None, None
            prevnext = prevnext[0]
            _next_xpath = prevnext.xpath("ti:next/ti:urn/text()",
                                         namespaces=XPATH_NAMESPACES,
                                         smart_strings=False)
            _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()",
                                         namespaces=XPATH_NAMESPACES,
                                         smart_strings=False)

            if len(_next_xpath):
                _next = _next_xpath[0].split(":")[-1]

            if len(_prev_xpath):
                _prev = _prev_xpath[0].split(":")[-1]

        return _prev, _next
Example #2
0
    def getPassage(self, reference=None):
        """ Retrieve a passage and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference, or URN, or str or list(str)
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(reference, URN):
            urn = str(reference)
        elif isinstance(reference, Reference):
            urn = "{0}:{1}".format(self.urn, str(reference))
        elif isinstance(reference, str):
            if ":" in reference:
                urn = reference
            else:
                urn = "{0}:{1}".format(self.urn, reference)
        elif isinstance(reference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(reference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0])
        return Passage(urn=urn, resource=response, retriever=self.retriever)
Example #3
0
    def getTextualNode(self, subreference=None):
        """ Retrieve a passage and store it in the object

        :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \
        compose the reference)
        :type subreference: Union[Reference, URN, str, list]
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(subreference, URN):
            urn = str(subreference)
        elif isinstance(subreference, Reference):
            urn = "{0}:{1}".format(self.urn, str(subreference))
        elif isinstance(subreference, str):
            if ":" in subreference:
                urn = subreference
            else:
                urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference)
        elif isinstance(subreference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(subreference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0])
        return Passage(urn=urn, resource=response, retriever=self.retriever)
Example #4
0
    def getValidReff(self, level=1, reference=None):
        """ Given a resource, CitableText will compute valid reffs

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param reference: Passage reference
        :type reference: Reference
        :rtype: list(str)
        :returns: List of levels
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        if level == -1:
            level = len(self.citation)

        xml = self.retriever.getValidReff(
            level=level,
            urn=urn
        )
        xml = xmlparser(xml)
        self.__parse_request__(xml.xpath("//ti:request", namespaces=NS)[0])

        return [ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=NS)]
Example #5
0
 def test_text(self):
     """ Test text attribute """
     P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     # Without exclusion0
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT), "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
Example #6
0
    def prevnext(resource):
        """ Parse a resource to get the prev and next urn

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: (str, str)
        """
        _prev, _next = False, False
        resource = xmlparser(resource)
        prevnext = resource.xpath("//ti:prevnext", namespaces=NS)

        if len(prevnext) > 0:
            _next, _prev = None, None
            prevnext = prevnext[0]
            _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=NS, smart_strings=False)
            _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=NS, smart_strings=False)

            if len(_next_xpath):
                _next = _next_xpath[0].split(":")[-1]

            if len(_prev_xpath):
                _prev = _prev_xpath[0].split(":")[-1]

        return _prev, _next
Example #7
0
    def __init__(self, urn=None, citation=None, resource=None):
        self._passages = OrderedDict() # Represents real full passages / reffs informations. Only way to set it up is getValidReff without passage ?
        self._orphan = defaultdict(Reference) # Represents passage we got without asking for all. Storing convenience ?

        self._cRefPattern = MyCapytain.resources.texts.tei.Citation()
        self.resource = None
        self.xml = None
        self._URN = None

        if citation is not None:
            self.citation = citation
        if resource is not None:
            self.resource = resource
            self.xml = xmlparser(resource)

            self.__findCRefPattern(self.xml)

            try:
                xml = self.xml.xpath(self.citation.scope, namespaces=NS)[0]
            except IndexError:
                msg = "Main citation scope does not result in any result ({0})".format(self.citation.scope)
                raise RefsDeclError(msg)
            except Exception as E:
                raise E

            self._passages = Passage(resource=xml, citation=self.citation, urn=self.urn, id=None)
Example #8
0
    def getValidReff(self, level=1, reference=None):
        """ Given a resource, CitableText will compute valid reffs

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param reference: CapitainsCtsPassage reference
        :type reference: Reference
        :rtype: list(str)
        :returns: List of levels
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        if level == -1:
            level = len(self.citation)

        xml = self.retriever.getValidReff(level=level, urn=urn)
        xml = xmlparser(xml)
        self.__parse_request__(
            xml.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])

        return [
            ref.split(":")[-1]
            for ref in xml.xpath("//ti:reply//ti:urn/text()",
                                 namespaces=XPATH_NAMESPACES)
        ]
Example #9
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element rerpresenting a work
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang")
        if lang is not None:
            self.lang = lang

        for child in self.xml.xpath("ti:title", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["title"][lg] = child.text

        self.__editions = xpathDict(
            xml=self.xml, xpath="ti:edition", children=Edition, parents=tuple([self]) + self.parents
        )
        self.__translations = xpathDict(
            xml=self.xml, xpath="ti:translation", children=Translation, parents=tuple([self]) + self.parents
        )

        self.texts = collections.defaultdict(Text)
        for urn in self.__editions:
            self.texts[urn] = self.__editions[urn]
        for urn in self.__translations:
            self.texts[urn] = self.__translations[urn]

        return self.texts
Example #10
0
    def getTextualNode(self, subreference=None):
        """ Retrieve a passage and store it in the object

        :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \
        compose the reference)
        :type subreference: Union[Reference, URN, str, list]
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(subreference, URN):
            urn = str(subreference)
        elif isinstance(subreference, Reference):
            urn = "{0}:{1}".format(self.urn, str(subreference))
        elif isinstance(subreference, str):
            if ":" in subreference:
                urn = subreference
            else:
                urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE),
                                       subreference)
        elif isinstance(subreference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(subreference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(
            response.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])
        return CtsPassage(urn=urn, resource=response, retriever=self.retriever)
Example #11
0
 def test_text(self):
     """ Test text attribute """
     P = Passage(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     # Without exclusion0
     self.assertEqual(P.text(), "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.text(exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
Example #12
0
 def export(self, output="xml"):
     """ Create a {output} version of the TextInventory
     
     :param output: output to be chosen (Only XML for now)
     :type output: basestring
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     return xmlparser(str(self))
Example #13
0
    def parse(resource, parent=None):
        xml = xmlparser(resource)
        lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")

        o = XmlCtsCommentaryMetadata(urn=xml.get("urn"), parent=parent)
        if lang is not None:
            o.lang = lang
        XmlCtsCommentaryMetadata.parse_metadata(o, xml)
        return o
Example #14
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = Passage(resource=xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     ))
     self.assertEqual(
         str(P),
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
Example #15
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element representing the text inventory
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        self.textgroups = xpathDict(xml=self.xml, xpath="//ti:textgroup", children=TextGroup, parents=self)
        return self.textgroups
Example #16
0
    def test_changing_space(self):
        """ Test when user change default value of export joining char """
        X = xmlparser(
            """<root>in- genium<note place="unspecified">ingenium <hi rend="italic">ll.v</hi>(<hi rend="italic">G</hi>). -nio
<hi rend="italic">B.</hi> in ganea <hi rend="italic">J</hi></note><add>n</add>a<add>t</add>us</root>"""
        )
        P = TEIResource(identifier="dummy", resource=X)
        P.plaintext_string_join = ""
        self.assertEqual(P.export(Mimetypes.PLAINTEXT, exclude=["note"]),
                         "in- geniumnatus")
Example #17
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = TEIResource(
         identifier="dummy",
         resource=xmlparser(
             '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
         ))
     self.assertEqual(
         P.export(Mimetypes.XML.Std),
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
Example #18
0
 def test_text(self):
     """ Test text attribute """
     P = Passage(resource=xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     ))
     # Without exclusion0
     self.assertEqual(P.text(),
                      "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.text(exclude=["note"]),
                      "Ibis ab excusso missus in astra sago. ")
Example #19
0
 def test_exportable_capacities(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = TEIResource(identifier="dummy", resource=X)
     self.assertEqual(
         P.export_capacities, [
             Mimetypes.PYTHON.ETREE, Mimetypes.XML.Std,
             Mimetypes.PYTHON.NestedDict, Mimetypes.PLAINTEXT,
             Mimetypes.XML.TEI
         ],
         "CapitainsCtsPassage should be able to export to given resources")
Example #20
0
    def getLabel(self):
        """ Retrieve metadata about the text

        :rtype: Metadata
        :returns: Dictionary with label informations
        """
        response = xmlparser(self.retriever.getLabel(urn=str(self.urn)))

        self.__parse_request__(
            response.xpath("//ti:reply/ti:label",
                           namespaces=XPATH_NAMESPACES)[0])

        return self.metadata
Example #21
0
 def test_text(self):
     """ Test text attribute """
     P = TEIResource(
         identifier="dummy",
         resource=xmlparser(
             '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
         ))
     # Without exclusion0
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT),
                      "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(
         P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]),
         "Ibis ab excusso missus in astra sago. ")
Example #22
0
    def firstUrn(resource):
        """ Parse a resource to get the first URN

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: str
        """
        resource = xmlparser(resource)
        urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=NS, magic_string=True)

        if len(urn) > 0:
            urn = str(urn[0])
            return urn.split(":")[-1]
Example #23
0
    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""):
        """ Create a {format} version of the Work

        :param output: Format to be chosen (Only XML for now)
        :type output: basestring
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.XML.CTS:
            return str(self)
Example #24
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element rerpresenting the textgroup
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        for child in self.xml.xpath("ti:groupname", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["groupname"][lg] = child.text

        self.works = xpathDict(xml=self.xml, xpath="ti:work", children=Work, parents=(self, self.parents))
        return self.works
Example #25
0
    def test_ingest_single_and(self):
        text = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
    <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" />
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        citation = Citation.ingest(text)
        self.maxDiff = None
        self.assertEqual(
            str(citation),
            """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(citation.scope, "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']")
        self.assertEqual(citation.xpath, "/tei:div[@n='?' and @type='section']")
        self.assertEqual(citation.fill("1"), "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']")
Example #26
0
    def getLabel(self):
        """ Retrieve metadata about the text

        :rtype: Metadata
        :returns: Dictionary with label informations
        """
        response = xmlparser(
            self.retriever.getLabel(urn=str(self.urn))
        )

        self.__parse_request__(
            response.xpath("//ti:reply/ti:label", namespaces=NS)[0]
        )

        return self.metadata
Example #27
0
 def export(self, output="xml", **kwargs):
     """ Create a {format} version of the Work
     
     :param output: Format to be chosen (Only XML for now)
     :type output: basestring, citation
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     if output == "xml":
         return xmlparser(str(self))
     elif issubclass(output, text.Text):
         complete_metadata = self.metadata
         for parent in self.parents:
             if isinstance(parent, inventory.Resource):
                 complete_metadata = complete_metadata + parent.metadata
         return output(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
Example #28
0
    def test_ingest_single(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts book and poem and line</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
Example #29
0
    def test_ingest_single(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts book and poem and line</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
Example #30
0
 def test_get_passage_plus_formatted(self):
     response = self.endpoint.getPassagePlus("urn:cts:farsiLit:hafez.divan:1.1.1.2", output=XML)
     parsed_response = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.2")
     self.assertEqual(
         parsed_response.text().strip(),
         "که عشق آسان نمود اول ولی افتاد مشکل‌ها ***",
         "API Response should be parsable by MyCapytain Library"
     )
     self.assertIn(
         "<prev><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.1.1</urn></prev>", response,
         "Previous URN should be found"
     )
     self.assertIn(
         "<next><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.2.1</urn></next>", response,
         "Next URN should be found"
     )
Example #31
0
    def firstUrn(resource):
        """ Parse a resource to get the first URN

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: str
        """
        resource = xmlparser(resource)
        urn = resource.xpath("//ti:reply/ti:urn/text()",
                             namespaces=XPATH_NAMESPACES,
                             magic_string=True)

        if len(urn) > 0:
            urn = str(urn[0])
            return urn.split(":")[-1]
Example #32
0
    def parse(resource):
        """ Parse a resource 

        :param resource: Element representing the text inventory
        :param type: basestring, etree._Element
        """
        xml = xmlparser(resource)
        o = XmlCtsTextInventoryMetadata(
            name=xml.xpath("//ti:TextInventory",
                           namespaces=XPATH_NAMESPACES)[0].get("tiid") or "")
        # Parse textgroups
        xpathDict(xml=xml,
                  xpath='//ti:textgroup',
                  cls=XmlCtsTextgroupMetadata,
                  parent=o)
        return o
Example #33
0
    def parse(resource, parent=None):
        """ Parse a textgroup resource

        :param resource: Element representing the textgroup
        :param parent: Parent of the textgroup
        """
        xml = xmlparser(resource)
        o = XmlCtsTextgroupMetadata(urn=xml.get("urn"), parent=parent)

        for child in xml.xpath("ti:groupname", namespaces=XPATH_NAMESPACES):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                o.set_cts_property("groupname", child.text, lg)

        # Parse Works
        xpathDict(xml=xml, xpath='ti:work', cls=XmlCtsWorkMetadata, parent=o)

        __parse_structured_metadata__(o, xml)
        return o
Example #34
0
 def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
     """ Create a {format} version of the Work
     
     :param output: Format to be chosen (Only XML for now)
     :type output: basestring, citation
     :param domain: Domain to prefix IDs
     :type domain: str
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     if output == Mimetypes.PYTHON.ETREE:
         return xmlparser(str(self))
     elif output == Mimetypes.PYTHON.MyCapytain.ReadableText:
         complete_metadata = self.metadata
         for parent in self.parents:
             if isinstance(parent, cts.CTSCollection) and hasattr(parent, "metadata"):
                 complete_metadata = complete_metadata + parent.metadata
         return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
     elif output == Mimetypes.XML.CTS:
         return str(self)
Example #35
0
    def getPassagePlus(self, reference=None):
        """ Retrieve a passage and informations around it and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference or List of text_type
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassagePlus(urn=urn))

        self.__parse_request__(response.xpath("//ti:reply/ti:label", namespaces=NS)[0])
        passage = Passage(urn=urn, resource=response, retriever=self.retriever)
        passage.metadata, passage.citation = self.metadata, self.citation
        return passage
Example #36
0
    def parse(self, resource):
        """ Parse a resource to feed the object
        
        :param resource: An xml representation object
        :type resource: basestring or lxml.etree._Element
        :returns: None
        """
        self.xml = xmlparser(resource)
        self.urn = URN(self.xml.get("urn"))
        self.id = str(self.urn)

        if self.subtype == "Translation":
            lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lang is not None:
                self.lang = lang

        for child in self.xml.xpath("ti:description", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["description"][lg] = child.text

        for child in self.xml.xpath("ti:label", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["label"][lg] = child.text

        self.__findCitations(
            xml=self.xml,
            xpath="ti:online/ti:citationMapping/ti:citation"
        )

        online = self.xml.xpath("ti:online", namespaces=NS)
        if len(online) > 0:
            online = online[0]
            self.docname = online.get("docname")
            for validate in online.xpath("ti:validate", namespaces=NS):
                self.validate = validate.get("schema")
            for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS):
                self.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI")

        return None
Example #37
0
    def test_ingest_single_and(self):
        text = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
    <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" />
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        citation = Citation.ingest(text)
        self.maxDiff = None
        self.assertEqual(
            str(citation),
            """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            citation.scope,
            "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']")
        self.assertEqual(citation.xpath,
                         "/tei:div[@n='?' and @type='section']")
        self.assertEqual(
            citation.fill("1"),
            "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']"
        )
Example #38
0
 def test_get_label(self):
     """Check get Label"""
     # Need to parse with Citation and parse individually or simply check for some equality
     data = self.app.get("/cts?request=GetLabel&urn=urn:cts:latinLit:phi1294.phi002.perseus-lat2")\
         .data.decode("utf-8").replace("\n", "")
     parsed = xmlparser(data)
     label = parsed.xpath(".//ti:label", namespaces=XPATH_NAMESPACES)
     label_str = re.sub("\s+", " ", tostring(label[0], encoding=str)).replace("\n", "")
     self.assertIn(
         '<groupname xml:lang="eng">Martial</groupname>',
         label_str,
         "groupname should be exported correctly"
     )
     self.assertIn(
         '<title xml:lang="eng">Epigrammata</title>',
         label_str,
         "title should be exported correctly"
     )
     self.assertIn(
         '<description xml:lang="eng"> M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus </description>',
         label_str,
         "description should be exported correctly"
     )
     self.assertIn(
         '<label xml:lang="eng">Epigrammata</label>',
         label_str,
         "label should be exported correctly"
     )
     citation = Citation.ingest(label[0])
     self.assertEqual(
         len(citation), 3, "There should be three level of citation"
     )
     self.assertEqual(
         citation.name, "book", "First level is book"
     )
     if self.cache is not None:
         self.assertGreater(
             len(self.cache.cache._cache), 0,
             "There should be something cached"
         )
Example #39
0
    def test_ingest_multiple(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts line</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="poem"
             matchPattern="(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])">
    <tei:p>This pointer pattern extracts poem</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="book"
             matchPattern="(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])">
    <tei:p>This pointer pattern extracts book</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))

        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child),
            """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child.child),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            a.child.child.fill(Reference("1.2.3")),
            "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']"
        )
Example #40
0
    def test_ingest_multiple(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts line</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="poem"
             matchPattern="(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])">
    <tei:p>This pointer pattern extracts poem</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="book"
             matchPattern="(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])">
    <tei:p>This pointer pattern extracts book</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))

        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child),
            """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child.child),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            a.child.child.fill(Reference("1.2.3")),
            "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']"
        )
Example #41
0
    def parse(resource, parent=None):
        """ Parse a resource

        :param resource: Element rerpresenting a work
        :param type: basestring, etree._Element
        :param parent: Parent of the object
        :type parent: XmlCtsTextgroupMetadata
        """
        xml = xmlparser(resource)
        o = XmlCtsWorkMetadata(urn=xml.get("urn"), parent=parent)

        lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")
        if lang is not None:
            o.lang = lang

        for child in xml.xpath("ti:title", namespaces=XPATH_NAMESPACES):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                o.set_cts_property("title", child.text, lg)

        # Parse children
        xpathDict(xml=xml,
                  xpath='ti:edition',
                  cls=XmlCtsEditionMetadata,
                  parent=o)
        xpathDict(xml=xml,
                  xpath='ti:translation',
                  cls=XmlCtsTranslationMetadata,
                  parent=o)
        # Added for commentary
        xpathDict(xml=xml,
                  xpath='ti:commentary',
                  cls=XmlCtsCommentaryMetadata,
                  parent=o)

        __parse_structured_metadata__(o, xml)

        return o
Example #42
0
    def getPassagePlus(self, reference=None):
        """ Retrieve a passage and informations around it and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference or List of text_type
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassagePlus(urn=urn))

        passage = CtsPassage(urn=urn,
                             resource=response,
                             retriever=self.retriever)
        passage.__parse_request__(
            response.xpath("//ti:reply/ti:label",
                           namespaces=XPATH_NAMESPACES)[0])
        self.citation = passage.citation
        return passage
Example #43
0
    def __init__(self,
                 urn=None,
                 citation=None,
                 resource=None,
                 autoreffs=False):
        super(Text, self).__init__(urn=urn, citation=citation)
        self._passages = Passage()
        self._orphan = defaultdict(
            Reference
        )  # Represents passage we got without asking for all. Storing convenience ?

        self._cRefPattern = MyCapytain.resources.texts.tei.Citation()
        self.xml = None

        if citation is not None:
            self.citation = citation

        if resource is not None:
            self.resource = resource
            self.xml = xmlparser(resource)
            self.__findCRefPattern(self.xml)

            if autoreffs is True:
                self.parse()
Example #44
0
from __future__ import unicode_literals

import unittest
from six import text_type as str
from io import open

from MyCapytain.common.utils import xmlparser, NS
from MyCapytain.resources.texts.api import *
from MyCapytain.resources.texts.tei import Citation
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.reference import Reference, URN
from lxml import etree
import mock

with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrn.xml") as f:
    Get_FIRST = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f:
    Get_FIRST_EMPTY = xmlparser(f)
with open("tests/testing_data/cts/getlabel.xml") as f:
    GET_LABEL = xmlparser(f)


class TestAPIText(unittest.TestCase):
Example #45
0
 def test_xml(self):
     X = xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')
     P = TEIResource(resource=X)
     self.assertIs(X, P.xml)
Example #46
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     self.assertEqual(str(P), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')
Example #47
0
 def __export__(self, output=None, domain=""):
     if output == Mimetypes.PYTHON.ETREE:
         return xmlparser(self.export(output=Mimetypes.XML.CTS))
Example #48
0
from MyCapytain.resolvers.cts.api import HttpCtsResolver
from MyCapytain.retrievers.cts5 import HttpCtsRetriever
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes
from MyCapytain.resources.prototypes.text import Passage
from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsTextgroupMetadata, XmlCtsWorkMetadata, XmlCtsTextMetadata
from MyCapytain.resources.prototypes.metadata import Collection

from unittest import TestCase
from mock import MagicMock

with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f:
    NEXT = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f:
    PREV = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF_FULL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities.xml") as f:
    GET_CAPABILITIES = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities1294002.xml") as f:
    GET_CAPABILITIES_FILTERED = xmlparser(f)
with open("tests/testing_data/cts/getPassageOtherTest.xml") as f:
    GET_PASSAGE_CITATION_FAILURE = f.read()
Example #49
0
 def test_get_passage_formatted(self):
     response = self.endpoint.getPassage("urn:cts:farsiLit:hafez.divan:1.1.1.1", output=XML)
     p = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.1")
     """
Example #50
0
 def test_xml(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = Passage(resource=X)
     self.assertIs(X, P.xml)
Example #51
0
    def parse(resource, parent=None):
        xml = xmlparser(resource)
        o = XmlCtsEditionMetadata(urn=xml.get("urn"), parent=parent)
        XmlCtsEditionMetadata.parse_metadata(o, xml)

        return o
Example #52
0
 def __init__(self, resource, **kwargs):
     super(TEIResource, self).__init__(**kwargs)
     self.resource = xmlparser(resource)
     self.__plaintext_string_join__ = "" + self.PLAINTEXT_STRING_JOIN
Example #53
0
 def xmlparse(self, file):
     """ Parse a XML file
     :param file: Opened File
     :return: Tree
     """
     return xmlparser(file)
Example #54
0
from MyCapytain.resolvers.cts.api import HttpCTSResolver
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import NS, Mimetypes
from MyCapytain.common.metadata import Metadatum
from MyCapytain.resources.prototypes.text import Passage
from MyCapytain.resources.collections.cts import TextInventory, TextGroup, Work, Text
from MyCapytain.resources.prototypes.metadata import Collection

from unittest import TestCase
from mock import MagicMock


with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f:
    NEXT = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f:
    PREV = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF_FULL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities.xml") as f:
    GET_CAPABILITIES = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities1294002.xml") as f:
    GET_CAPABILITIES_FILTERED = xmlparser(f)
Example #55
0
 def xmlparse(self, file):
     """ Parse a XML file
     :param file: Opened File
     :return: Tree
     """
     return xmlparser(file)
Example #56
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import unittest
from six import text_type as str
from io import open

from MyCapytain.resources.texts.api.cts import Passage, Text
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.reference import Reference, Citation, URN
from MyCapytain.common.metadata import Metadata, Metadatum
from MyCapytain.common.utils import xmlparser, NS
import mock

with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrn.xml") as f:
    Get_FIRST = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f:
    Get_FIRST_EMPTY = xmlparser(f)
with open("tests/testing_data/cts/getlabel.xml") as f:
    GET_LABEL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF_1_1 = xmlparser(f)
Example #57
0
 def test_xml(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = TEIResource(identifier="dummy", resource=X)
     self.assertIs(X, P.xml)
import unittest
from io import open

import xmlunittest
from lxml import etree

from MyCapytain.common.utils import xmlparser
import MyCapytain.common.reference
import MyCapytain.errors
import MyCapytain.resources.texts.encodings
import MyCapytain.resources.texts.locals.tei
from tests.resources.commonTests import CapitainsXmlTextTest, CapitainsXmlPassageTests, CapitainsXMLRangePassageTests


objectifiedParser = lambda x: xmlparser(x, objectify=False)


class TestLocalXMLTextImplementation(CapitainsXmlTextTest, unittest.TestCase, xmlunittest.XmlTestMixin):
    """ Test XML Implementation of resources found in local file """

    def setUp(self):
        self.text = open("tests/testing_data/texts/sample.xml", "rb")
        self.TEI = MyCapytain.resources.texts.locals.tei.Text(
            resource=objectifiedParser(self.text),
            urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2"
        )
        self.treeroot = etree._ElementTree()

        with open("tests/testing_data/texts/text_or_xpath.xml") as f:
            self.text_complex = MyCapytain.resources.texts.locals.tei.Text(
Example #59
0
         "TEI fails with urn and xml lang on @xml:base/div-{epidoc}"),

        ("tei", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False,
         "TEI fails with urn and without xml lang on @n/div-{epidoc}"),

        ("tei", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", False,
         "TEI fails with urn and without xml lang on @xml:base/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", True,
         "Epidoc works with urn and xml lang on @n/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False,
         "Epidoc fails with urn and xml lang on @xml:base/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}'>", False,
         "Epidoc fails with urn and without xml lang on @n/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}'>", False,
         "Epidoc fails with urn and without xml lang on @xml:base/div-{epidoc}")
    ]
    for type_epidoc in ["edition", "translation", "commentary"]
]
XMLLANG_DOCUMENTS = [
    (
        scheme,
        tostring(xmlparser(TEMPLATES.replace(source, replacement).format(urn=URN, lang=LANG)), encoding=str),
        boolean,
        msg + " ("+replacement.format(urn=URN, lang=LANG)+")"
    )
    for scheme, source, replacement, boolean, msg in XMLLANG_DOCUMENTS
]
Example #60
0
 def __init__(self, resource, **kwargs):
     super(TEIResource, self).__init__(**kwargs)
     self.resource = xmlparser(resource)