예제 #1
0
class TextXMLFolderResolver(TestCase):
    """ Ensure working state of resolver """
    def setUp(self):
        get_graph().remove((None, None, None))
        self.resolver = CtsCapitainsLocalResolver(["./tests/testing_data/latinLit2"])

    def test_getPassage_full(self):
        """ Test that we can get a full text """
        passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2")
        self.assertIsInstance(
            passage, PrototypeCtsPassage,
            "GetPassage should always return passages objects"
        )

        children = passage.getReffs()

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(
            children[0], CtsReference('1'),
            "Resource should be string identifiers"
        )

        self.assertIn(
            "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT),
            "Export CtsTextMetadata should work correctly"
        )

        self.assertEqual(
            passage.export(
                output=Mimetypes.PYTHON.ETREE
            ).xpath(
                ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False
            ),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object"
        )

    def test_getPassage_no_canonical(self):
        """ Test that we can get a subreference text passage where no canonical exists"""
        passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959.phi010.perseus-eng2", "2")
        self.assertEqual(
            passage.export(Mimetypes.PLAINTEXT), "Omne fuit Musae carmen inerme meae; ",
            "CapitainsCtsPassage should resolve if directly asked"
        )
        with self.assertRaises(UnknownObjectError):
            passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959.phi010", "2")
        with self.assertRaises(InvalidURN):
            passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959", "2")

    def test_getPassage_subreference(self):
        """ Test that we can get a subreference text passage"""
        passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.1")

        # We check we made a reroute to GetPassage request
        self.assertIsInstance(
            passage, PrototypeCtsPassage,
            "GetPassage should always return passages objects"
        )

        children = list(passage.getReffs())

        self.assertEqual(
            str(children[0]), '1.1.1',
            "Resource should be string identifiers"
        )

        self.assertIn(
            "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT),
            "Export CtsTextMetadata should work correctly"
        )
        canonical = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002", "1.1")
        self.assertEqual(
            passage.export(output=Mimetypes.PLAINTEXT),
            canonical.export(output=Mimetypes.PLAINTEXT),
            "Canonical text should work"
        )

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object"
        )

    def test_getPassage_full_metadata(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2", metadata=True)

        self.assertIsInstance(
            passage, PrototypeCtsPassage,
            "GetPassage should always return passages objects"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]),
            "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            passage.citation.name, "book",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            len(passage.citation), 3,
            "Local Inventory Files should be parsed and aggregated correctly"
        )

        children = list(passage.getReffs(level=3))
        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(
            children[0], CtsReference('1.pr.1'),
            "Resource should be string identifiers"
        )

        self.assertIn(
            "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT),
            "Export CtsTextMetadata should work correctly"
        )

        self.assertEqual(
            passage.export(
                output=Mimetypes.PYTHON.ETREE
            ).xpath(
                ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False
            ),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object"
        )

    def test_getPassage_prevnext(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1",  metadata=True
        )

        self.assertIsInstance(
            passage, PrototypeCtsPassage,
            "GetPassage should always return passages objects"
        )
        self.assertEqual(
            passage.prevId, CtsReference("1.pr"),
            "Previous CapitainsCtsPassage ID should be parsed"
        )
        self.assertEqual(
            passage.nextId, CtsReference("1.2"),
            "Next CapitainsCtsPassage ID should be parsed"
        )

        children = list(passage.getReffs())
        # Ensure navigability
        self.assertIn(
            "verentia ludant; quae adeo antiquis auctoribus defuit, ut",
            passage.prev.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available"
        )
        self.assertIn(
            "Qui tecum cupis esse meos ubicumque libellos ",
            passage.next.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available"
        )

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(
            str(children[0]), '1.1.1',
            "Resource should be string identifiers"
        )

        self.assertIn(
            "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT),
            "Export CtsTextMetadata should work correctly"
        )

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object"
        )

    def test_getPassage_metadata_prevnext(self):
        """ Test that we can get a full text with its metadata"""
        passage = self.resolver.getTextualNode(
            "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", metadata=True, prevnext=True
        )
        self.assertIsInstance(
            passage, PrototypeCtsPassage,
            "GetPassage should always return passages objects"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]),
            "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            passage.citation.name, "poem",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            passage.citation.root.name, "book",
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            len(passage.citation.root), 3,
            "Local Inventory Files should be parsed and aggregated correctly"
        )
        self.assertEqual(
            passage.prevId, CtsReference("1.pr"),
            "Previous CapitainsCtsPassage ID should be parsed"
        )
        self.assertEqual(
            passage.nextId, CtsReference("1.2"),
            "Next CapitainsCtsPassage ID should be parsed"
        )
        children = list(passage.getReffs())
        # Ensure navigability
        self.assertIn(
            "verentia ludant; quae adeo antiquis auctoribus defuit, ut",
            passage.prev.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available"
        )
        self.assertIn(
            "Qui tecum cupis esse meos ubicumque libellos ",
            passage.next.export(output=Mimetypes.PLAINTEXT),
            "Left and Right Navigation should be available"
        )

        # We check the passage is able to perform further requests and is well instantiated
        self.assertEqual(
            str(children[0]), '1.1.1',
            "Resource should be string identifiers"
        )

        self.assertIn(
            "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT),
            "Export CtsTextMetadata should work correctly"
        )

        self.assertEqual(
            passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False),
            ["Hic est quem legis ille, quem requiris, "],
            "Export to Etree should give an Etree or Etree like object"
        )

    def test_getMetadata_full(self):
        """ Checks retrieval of Metadata information """
        metadata = self.resolver.getMetadata()
        self.assertIsInstance(
            metadata, Collection,
            "Resolver should return a collection object"
        )
        self.assertIsInstance(
            metadata.members[0], Collection,
            "Members of Inventory should be TextGroups"
        )
        self.assertEqual(
            len(metadata.descendants), 44,
            "There should be as many descendants as there is edition, translation, commentary, works and textgroup + 1 "
            "for default inventory"
        )
        self.assertEqual(
            len(metadata.readableDescendants), 26,
            "There should be as many readable descendants as there is edition, translation, commentary (26 ed+tr+cm)"
        )
        self.assertEqual(
            len([x for x in metadata.readableDescendants if isinstance(x, TextMetadata)]), 26,
            "There should be 24 editions + 1 translation + 1 commentary in readableDescendants"
        )
        self.assertEqual(
            len(metadata.export(output=Mimetypes.PYTHON.ETREE).xpath(
                "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1,
            "There should be one node in exported format corresponding to lat2"
        )
        self.assertCountEqual(
            [x["@id"] for x in metadata.export(output=Mimetypes.JSON.DTS.Std)["member"]],
            ["urn:cts:latinLit:phi1294", "urn:cts:latinLit:phi0959",
             "urn:cts:greekLit:tlg0003", "urn:cts:latinLit:phi1276"],
            "There should be 4 Members in DTS JSON"
        )

    def test_getMetadata_subset(self):
        """ Checks retrieval of Metadata information """
        metadata = self.resolver.getMetadata(objectId="urn:cts:latinLit:phi1294.phi002")
        self.assertIsInstance(
            metadata, Collection,
            "Resolver should return a collection object"
        )
        self.assertIsInstance(
            metadata.members[0], TextMetadata,
            "Members of CtsWorkMetadata should be Texts"
        )
        self.assertEqual(
            len(metadata.descendants), 2,
            "There should be as many descendants as there is edition, translation, commentary"
        )
        self.assertEqual(
            len(metadata.readableDescendants), 2,
            "There should be 1 edition + 1 commentary in readableDescendants"
        )
        self.assertEqual(
            len([x for x in metadata.readableDescendants if isinstance(x, TextMetadata)]), 2,
            "There should be 1 edition + 1 commentary in readableDescendants"
        )
        self.assertIsInstance(
            metadata.parent, CtsTextgroupMetadata,
            "First parent should be CtsTextgroupMetadata"
        )
        self.assertIsInstance(
            metadata.parents[0], CtsTextgroupMetadata,
            "First parent should be CtsTextgroupMetadata"
        )
        self.assertEqual(
            len(metadata.export(output=Mimetypes.PYTHON.ETREE).xpath(
                "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1,
            "There should be one node in exported format corresponding to lat2"
        )
        self.assertCountEqual(
            [x["@id"] for x in metadata.export(output=Mimetypes.JSON.DTS.Std)["member"]],
            ["urn:cts:latinLit:phi1294.phi002.opp-eng3", "urn:cts:latinLit:phi1294.phi002.perseus-lat2"],
            "There should be two members in DTS JSON"
        )

        tr = self.resolver.getMetadata(objectId="urn:cts:greekLit:tlg0003.tlg001.opp-fre1")
        self.assertIsInstance(
            tr, CtsTranslationMetadata, "Metadata should be translation"
        )
        self.assertEqual(
            tr.lang, "fre", "Language is French"
        )
        self.assertIn(
            "Histoire de la Guerre du Péloponnése",
            tr.get_description("eng"),
            "Description should be the right one"
        )

        cm = self.resolver.getMetadata(objectId="urn:cts:latinLit:phi1294.phi002.opp-eng3")
        self.assertIsInstance(
            cm, CtsCommentaryMetadata, "Metadata should be commentary"
        )
        self.assertEqual(
            cm.lang, "eng", "Language is English"
        )
        self.assertIn(
            "Introduction to Martial's Epigrammata",
            cm.get_description("eng"),
            "Description should be the right one"
        )

    def test_getSiblings(self):
        """ Ensure getSiblings works well """
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1"
        )
        self.assertEqual(
            previous, CtsReference("1.pr"),
            "Previous reference should be well computed"
        )
        self.assertEqual(
            nextious, CtsReference("1.2"),
            "Next reference should be well computed"
        )

    def test_getSiblings_nextOnly(self):
        """ Ensure getSiblings works well when there is only the next passage"""
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.pr"
        )
        self.assertEqual(
            previous, None,
            "Previous reference should not exist"
        )
        self.assertEqual(
            nextious, CtsReference("1.1"),
            "Next reference should be well computed"
        )

    def test_getSiblings_prevOnly(self):
        """ Ensure getSiblings works well when there is only the previous passage"""
        previous, nextious = self.resolver.getSiblings(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="14.223"
        )
        self.assertEqual(
            previous, CtsReference("14.222"),
            "Previous reference should be well computed"
        )
        self.assertEqual(
            nextious, None,
            "Next reference should not exist"
        )

    def test_getReffs_full(self):
        """ Ensure getReffs works well """
        reffs = self.resolver.getReffs(textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=1)
        self.assertEqual(
            len(reffs), 14,
            "There should be 14 books"
        )
        self.assertEqual(
            reffs[0], CtsReference("1")
        )

        reffs = self.resolver.getReffs(textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=2)
        self.assertEqual(
            len(reffs), 1527,
            "There should be 1527 poems"
        )
        self.assertEqual(
            reffs[0], CtsReference("1.pr")
        )

        reffs = self.resolver.getReffs(
            textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2",
            subreference="1.1",
            level=1
        )
        self.assertEqual(
            len(reffs), 6,
            "There should be 6 references"
        )
        self.assertEqual(
            reffs[0], CtsReference("1.1.1")
        )
예제 #2
0
from MyCapytain.resolvers.cts.local import CtsCapitainsLocalResolver
from MyCapytain.common.constants import Mimetypes
import os

os.mkdir('text')
Repository = CtsCapitainsLocalResolver(["./"])
for text in Repository.texts:
    try:
        interactive_text = Repository.getTextualNode(text.id)
        plaintext = interactive_text.export(
            Mimetypes.PLAINTEXT, exclude=["tei:note", "tei:teiHeader"])
        with open('text/{}.txt'.format(text.id.split(':')[-1]), mode='w') as f:
            f.write(plaintext)
    except Exception as E:
        print(E)
        continue