Esempio n. 1
0
                        help='remote CTS server')
    parser.add_argument('--index',
                        metavar="FILE",
                        default=Config.INDEX,
                        help='corpus index file')
    parser.add_argument('--corpus',
                        metavar="DIR",
                        default=Config.DATA,
                        help='local corpus directory')

    args = parser.parse_args()

    # clean destination directory
    dest = os.path.join(args.corpus, 'xml')
    if os.path.exists(dest):
        shutil.rmtree(dest)
        os.makedirs(dest)
    else:
        os.makedirs(dest)

    # Read the corpus metadata
    with open(args.index) as f:
        corpus = [Text.metaFromDict(rec) for rec in json.load(f)]

    # Create a Resolver instance
    resolver = HttpCtsResolver(HttpCtsRetriever(args.server))

    for text in corpus:
        retrieveXML(resolver, text, os.path.join(dest, text.author))
        print()
Esempio n. 2
0
from MyCapytain.resolvers.cts.api import HttpCtsResolver
from MyCapytain.retrievers.cts5 import HttpCtsRetriever
from MyCapytain.common.constants import Mimetypes

# We set up a resolver which communicates with an API available in Leipzig
resolver = HttpCtsResolver(
    HttpCtsRetriever("http://cts.dh.uni-leipzig.de/api/cts"))
# We require some metadata information
textMetadata = resolver.getMetadata(
    "urn:cts:latinLit:phi1294.phi002.perseus-lat2")
# Texts in CTS Metadata have one interesting property : its citation scheme.
# XmlCtsCitation are embedded objects that carries information about how a text can be quoted, what depth it has
print(type(textMetadata),
      [citation.name for citation in textMetadata.citation])
# Now, we want to retrieve the first line of poem seventy two of the second book
passage = resolver.getTextualNode(
    "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="2.72.1")
# And we want to have its content exported to plain text and have the siblings of this passage (previous and next line)
print(passage.export(Mimetypes.PLAINTEXT), passage.siblingsId)
poemsInBook3 = resolver.getReffs(
    "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="3")
print(poemsInBook3)
Esempio n. 3
0
from MyCapytain.retrievers.cts5 import HttpCtsRetriever
from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsWorkMetadata
from MyCapytain.common.constants import Mimetypes
from pprint import pprint
"""
In order to have a real life example,
we are gonna query for data in the Leipzig CTS API
We are gonna query for metadata about Seneca who
is represented by urn:cts:latinLit:stoa0255

To retrieve data, we are gonna make a GetMetadata query
to the CTS Retriever.
"""
retriever = HttpCtsRetriever("http://cts.dh.uni-leipzig.de/api/cts")
# We store the response (Pure XML String)
response = retriever.getMetadata(objectId="urn:cts:latinLit:stoa0255")
"""
From here, we actually have the necessary data, we can now
play with collections. TextInventory is the main collection type that is needed to
parse the whole response.
"""
inventory = XmlCtsTextInventoryMetadata.parse(resource=response)
# What we are gonna do is print the title of each descendant :
for descendant in inventory.descendants:
    # Metadatum resolve any non-existing language ("eng", "lat") to a default one
    # Putting default is just making that clear
    print(descendant.get_label())
"""
You should see in there things such as
-   "Seneca, Lucius Annaeus" (The TextGroup or main object)
-   "de Ira" (The Work object)
Esempio n. 4
0
def api_resolver(endpoint) -> Resolver:
    return HttpCtsResolver(HttpCtsRetriever(endpoint))