class QueryResult(xmlmap.XmlObject): """The results of an eXist XQuery query""" start = xmlmap.IntegerField("@start|@exist:start") """The index of the first result returned""" values = xmlmap.StringListField("exist:value") "Generic value (*exist:value*) returned from an exist xquery" session = xmlmap.IntegerField("@exist:session") "Session id, when a query is requested to be cached" _raw_count = xmlmap.IntegerField("@count|@exist:count") @property def count(self): """The number of results returned in this chunk""" return self._raw_count or 0 _raw_hits = xmlmap.IntegerField("@hits|@exist:hits") @property def hits(self): """The total number of hits found by the search""" return self._raw_hits or 0 @property def results(self): """The result documents themselves as nodes, starting at :attr:`start` and containing :attr:`count` members""" return self.node.xpath('*')
class DziImage(xmlmap.XmlObject): # simple xmlobject to generate DZI xml for DeepZoom/Seadragon functionality ROOT_NAME = 'Image' ROOT_NS = 'http://schemas.microsoft.com/deepzoom/2008' ROOT_NAMESPACES = {'dz': ROOT_NS} tilesize = xmlmap.IntegerField('@TileSize') overlap = xmlmap.IntegerField('@Overlap') format = xmlmap.StringField('@Format') width = xmlmap.IntegerField('dz:Size/@Width') height = xmlmap.IntegerField('dz:Size/@Height')
class ProcessInformation(XmlObject): ROOT_NAME = 'ProcessInformation' pid = xmlmap.IntegerField('Pid') image = xmlmap.StringField('ImageName') # Must generate more samples : bytes, string or integer ? cmd_line_signature = xmlmap.StringField('CmdLineSignature') uptime = xmlmap.IntegerField('Uptime') vm = xmlmap.NodeField('ProcessVmInformation', ProcessVmInformation) parent = xmlmap.NodeField('ParentProcess/ProcessInformation', 'self', required=False)
class OSVersionInformation(XmlObject): ROOT_NAME = 'OSVersionInformation' version = xmlmap.StringField('WindowsNTVersion') build = xmlmap.IntegerField('Build') product = xmlmap.StringField('Product') edition = xmlmap.StringField('Edition') build_info = xmlmap.StringField('BuildString') revision = xmlmap.IntegerField('Revision') flavor = xmlmap.StringField('Flavor') architecture = xmlmap.StringField('Architecture') lcid = xmlmap.IntegerField('LCID')
class ESearchResponse(xmlmap.XmlObject): '''Minimal wrapper for ESearch XML returns''' count = xmlmap.IntegerField('Count') '''total articles matching the query''' query_key = xmlmap.IntegerField('QueryKey') '''server-assigned id for this query in history''' webenv = xmlmap.StringField('WebEnv') '''server-assigned web environment for history management''' docid = xmlmap.IntegerListField('IdList/Id') '''first page of document UIDs (*not* PMIDs) matching the query'''
class Item(xmlmap.XmlObject): ''':class:`~eulxml.xmlmap.XmlObject` to read Item information returned by the DigWF API. (Not all fields provided by DigWF are mapped here; only those currently in use.) ''' #: pid (noid portion of the ARK or Fedora pid) pid = xmlmap.StringField('@pid') #: item_id within the DigWF item_id = xmlmap.StringField('@id') #: control key (e.g., ocm or ocn number in euclid; unique per book, #: not per volume) control_key = xmlmap.StringField('@control_key') #title volume = xmlmap.StringField('volume') #: display image path display_image_path = xmlmap.StringField('display_images_path') #: display images count display_image_count = xmlmap.IntegerField('display_images_path/@count') #: path to OCR files (text & word position) ocr_file_path = xmlmap.StringField('ocr_files_path') #: ocr file count ocr_file_count = xmlmap.IntegerField('ocr_files_path/@count') #: path to PDF file pdf = xmlmap.StringField('pdf_file') #: path to ABBYY FineReader XML file ocr_file = xmlmap.StringField('ocr_file') #: path to marc xml file marc_path = xmlmap.StringField('marc_file') #: collection id collection_id = xmlmap.IntegerField('collection/@id') #: collection name collection_name = xmlmap.StringField('collection') # NOTE: these mappings are incomplete, and only include what was pused # for readux page ingest; we will likely need to add more mappings @cached_property def marc(self): # use pymarc to read the marcxml to make fields available if os.path.exists(self.marc_path): # with codecs.open(self.marc_path, 'r', "utf-8") as marcdata: with open(self.marc_path, 'r') as marcdata: # reader = MARCReader(marcdata, utf8_handling='ignore') return pymarc.parse_xml_to_array(marcdata)[0] else: print "Check if file %s exists or your mount connection" % self.marc_path
class Line(Base): '''A single line of text in a :class:`Paragraph`.''' ROOT_NAME = 'line' baseline = xmlmap.IntegerField('@baseline') 'integer baseline' left = xmlmap.IntegerField('@l') 'integer left' top = xmlmap.IntegerField('@t') 'integer top' right = xmlmap.IntegerField('@r') 'integer right' bottom = xmlmap.IntegerField('@b') 'integer bottom' formatted_text = xmlmap.NodeListField(frns('formatting'), Formatting) 'list of :class:`Formatting` elements'
class DatastreamProfile(_FedoraBase): """:class:`~eulxml.xmlmap.XmlObject` for datastream profile information returned by :meth:`REST_API.getDatastream`.""" # default namespace is fedora manage ROOT_NAME = 'datastreamProfile' label = xmlmap.StringField('m:dsLabel') "datastream label" version_id = xmlmap.StringField('m:dsVersionID') "current datastream version id" created = FedoraDateField('m:dsCreateDate') "date the datastream was created" state = xmlmap.StringField('m:dsState') "datastream state (A/I/D - Active, Inactive, Deleted)" mimetype = xmlmap.StringField('m:dsMIME') "datastream mimetype" format = xmlmap.StringField('m:dsFormatURI') "format URI for the datastream, if any" control_group = xmlmap.StringField('m:dsControlGroup') "datastream control group (inline XML, Managed, etc)" size = xmlmap.IntegerField( 'm:dsSize') # not reliable for managed datastreams as of Fedora 3.3 "integer; size of the datastream content" versionable = xmlmap.SimpleBooleanField('m:dsVersionable', 'true', 'false') "boolean; indicates whether or not the datastream is currently being versioned" # infoType ? # location ? checksum = xmlmap.StringField('m:dsChecksum') "checksum for current datastream contents" checksum_type = xmlmap.StringField('m:dsChecksumType') "type of checksum" checksum_valid = xmlmap.SimpleBooleanField('m:dsChecksumValid', 'true', 'false') '''Boolean flag indicating if the current checksum is valid. Only
class _BaseMessage(_BaseCerp): '''Common message elements''' local_id = xmlmap.IntegerField('xm:LocalId') message_id = xmlmap.StringField('xm:MessageId') message_id_supplied = xmlmap.SimpleBooleanField('xm:MessageId/@Supplied', true='1', false=None) mime_version = xmlmap.StringField('xm:MimeVersion') orig_date_list = xmlmap.StringListField('xm:OrigDate') # FIXME: really datetime # NOTE: eulxml.xmlmap.DateTimeField supports specifying format, # but we might need additional work since %z only works with # strftime, not strptime from_list = xmlmap.StringListField('xm:From') sender_list = xmlmap.StringListField('xm:Sender') to_list = xmlmap.StringListField('xm:To') cc_list = xmlmap.StringListField('xm:Cc') bcc_list = xmlmap.StringListField('xm:Bcc') in_reply_to_list = xmlmap.StringListField('xm:InReplyTo') references_list = xmlmap.StringListField('xm:References') subject_list = xmlmap.StringListField('xm:Subject') comments_list = xmlmap.StringListField('xm:Comments') keywords_list = xmlmap.StringListField('xm:Keywords') headers = xmlmap.NodeListField('xm:Header', Header) single_body = xmlmap.NodeField('xm:SingleBody', SingleBody) multi_body = xmlmap.NodeField('xm:MultiBody', MultiBody) @property def body(self): return self.single_body or self.multi_body incomplete_list = xmlmap.NodeField('xm:Incomplete', Incomplete) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.message_id or self.local_id or '(no id)')
class Items(xmlmap.XmlObject): ''':class:`~eulxml.xmlmap.XmlObject` for the response returned by getItems. Has a count of the number of items found, and a list of :class:`Item` objects with details about each item.''' count = xmlmap.IntegerField('@count') 'number of items in the result' items = xmlmap.NodeListField('item', Item) 'List of items as instances of :class:`~readux.books.digwf.Item`'
class Block(Base): ROOT_NAME = 'page' '''A single block of content on a :class:`Page`.''' type = xmlmap.StringField('@blockType') # Text, Table, Picture, Barcode 'type of block (Text, Table, Picture, Barcode)' left = xmlmap.IntegerField('@l') 'integer left' top = xmlmap.IntegerField('@t') 'integer top' right = xmlmap.IntegerField('@r') 'integer right' bottom = xmlmap.IntegerField('@b') 'integer bottom' # must have one & only one region; # region/rect dimensions appears to be redundant... paragraphs = xmlmap.NodeListField(frns('text/par'), Paragraph) 'list of :class:`Paragraph` elements'
class SystemInformation(XmlObject): ROOT_NAME = 'SystemInformation' id = xmlmap.StringField('MID') manufacturer = xmlmap.StringField('SystemManufacturer') product = xmlmap.StringField('SystemProductName') bios_version = xmlmap.StringField('BIOSVersion') # TODO: Parse to date with timezone bias install_date = xmlmap.IntegerField('OSInstallDate')
class SecondaryParameter(XmlObject): """ Secondary parameter complex type """ ROOT_NAME = 'SECONDARYPARAMETER' id = xmlmap.IntegerField('@id') """ Parameter ID :type `int` """ value = xmlmap.StringField('@value') """ Paramneter value :type `string` """
class File(XmlObject): """ File complex type """ ROOT_NAME = 'FILE' name = xmlmap.StringField('@filename') """ File name :type `string` """ type = xmlmap.IntegerField('@filetype') """ File type :type `int` """
class Paragraph(Base): '''A single paragraph of text somewhere in a :class:`Document`.''' ROOT_NAME = 'par' align = xmlmap.StringField( '@align') # default is Left; Center, Right, Justified 'text alignment (Left, Center, Right, Justified)' left_indent = xmlmap.IntegerField('@leftIndent') 'integer left indent' right_indent = xmlmap.IntegerField('@rightIndent') 'integer right indent' start_indent = xmlmap.IntegerField('@startIndent') 'integer start indent' line_spacing = xmlmap.IntegerField('@lineSpacing') 'integer line spacing' # dropChars stuff ? lines = xmlmap.NodeListField(frns('line'), Line) 'list of :class:`Line` elements'
class Parameter(XmlObject): """ Parameter complex type """ ROOT_NAME = 'PARAMETER' id = xmlmap.IntegerField('@id') """ Parameter ID :type `int`""" name = xmlmap.StringField('@name', required=False) """ Optional paramneter name :type `string`""" value = xmlmap.StringField('@value') """ Paramneter value :type `string`"""
class Page(Base): '''A single page of a :class:`Document`.''' ROOT_NAME = 'page' width = xmlmap.IntegerField('@width') 'integer width' height = xmlmap.IntegerField('@height') 'integer height' resolution = xmlmap.IntegerField('@resolution') 'integer resolution' blocks = xmlmap.NodeListField(frns('block'), Block) 'list of :class:`Block` elements in this page' text_blocks = xmlmap.NodeListField(frns('block[@blockType="Text"]'), Block) 'text :class:`Block` elements (where type is "Text")' picture_blocks = xmlmap.NodeListField(frns('block[@blockType="Picture"]'), Block) 'picture :class:`Block` elements (where type is "Picture")' # block position info possibly redundant? map paragraphs directly paragraphs = xmlmap.NodeListField(frns('block/text/par'), Paragraph) 'list of :class:`Paragraph` elements in any of the blocks on this page'
class Unitid(_EadBase): '''Unitid element''' ROOT_NAME = 'unitid' identifier = xmlmap.IntegerField('@identifier') 'machine-readable identifier - `@identifier`' country_code = xmlmap.StringField('@countrycode') 'country code - `@countrycode`' repository_code = xmlmap.StringField('@repositorycode') 'repository code - `@repositorycode`' value = xmlmap.StringField('.') "human-readable unitid - (contents of the element)"
class QueryTestModel(xmlmap.XmlObject): ROOT_NAMESPACES = {'ex': 'http://example.com/'} id = xmlmap.StringField('@id') name = xmlmap.StringField('name') description = xmlmap.StringField('description') wnn = xmlmap.IntegerField('wacky_node_name') sub = xmlmap.NodeField("sub", QuerySubModel) or_field = xmlmap.StringField('name|description|@id') substring = xmlmap.StringField('substring(name, 1, 1)') nsfield = xmlmap.StringField('ex:field') years = xmlmap.StringListField('year')
class MachineInfo(XmlObject): """ MachineInfo complex type """ ROOT_NAME = 'MACHINEINFO' name = xmlmap.StringField('@machinename') """ Machine name :type `string` """ os = xmlmap.StringField('@os') """ Machine operating system version :type `string` """ lcid = xmlmap.IntegerField('@lcid') """ Machine language identifier :type `int` """ oem = xmlmap.StringField('@oem', required=False) """ Optional machine OEM name :type `string` """
class VideoDigitalTech(_BaseDigitalTech): ":class:`~eulxml.xmlmap.XmlObject` for Digital Technical Metadata." ROOT_NAME = 'digitaltech' date_captured = xmlmap.StringField( 'dt:dateCaptured[@encoding="w3cdtf"]', help_text='Date digital capture was made', required=True) 'date digital capture was made (string)' codec_quality = xmlmap.StringField( 'dt:codecQuality', required=False, help_text='Whether the data compression method was lossless or lossy', choices=('lossless', 'compressed')) 'codec quality - lossless or lossy' duration = xmlmap.IntegerField( 'dt:duration/dt:measure[@type="time"][@unit="seconds"][@aspect="duration of playing time"]', help_text='Duration of video playing time', required=True) 'duration of the video file' # FIXME/TODO: note and digitization purpose could be plural note = xmlmap.StringField( 'dt:note[@type="general"]', required=False, help_text= 'Additional information that may be helpful in describing the surrogate' ) 'general note' note_list = xmlmap.StringListField('dt:note[@type="general"]') digitization_purpose = xmlmap.StringField( 'dt:note[@type="purpose of digitization"]', required=False, help_text= 'The reason why the digital surrogate was created (e.g., exhibit, patron request, preservation)' ) 'reason the item was digitized' digitization_purpose_list = xmlmap.StringListField( 'dt:note[@type="purpose of digitization"]') transfer_engineer = xmlmap.NodeField( 'dt:transferEngineer', TransferEngineer, required=False, help_text= 'The person who performed the digitization or conversion that produced the file' ) ':class:`TransferEngineer` - person who digitized the item' codec_creator = xmlmap.NodeField( 'dt:codecCreator[@type="moving image"]', VideoCodecCreator, help_text= 'Hardware, software, and software version used to create the digital file' ) ':class:`VideoCodecCreator` - hardware & software used to digitize the item'
class SearchResults(_FedoraBase): """:class:`~eulxml.xmlmap.XmlObject` for the results returned by :meth:`REST_API.findObjects`""" # default namespace is fedora types ROOT_NAME = 'result' session_token = xmlmap.StringField('t:listSession/t:token') "session token" cursor = xmlmap.IntegerField('t:listSession/t:cursor') "session cursor" expiration_date = DateTimeField('t:listSession/t:expirationDate') "session experation date" results = xmlmap.NodeListField('t:resultList/t:objectFields', SearchResult) "search results - list of :class:`SearchResult`"
class TestObject(xmlmap.XmlObject): ROOT_NAME = 'foo' id = xmlmap.StringField('@id', verbose_name='My Id', help_text='enter an id') int = xmlmap.IntegerField('bar[2]/baz') bool = xmlmap.SimpleBooleanField('boolean', 'yes', 'no') longtext = xmlmap.StringField('longtext', normalize=True, required=False) date = xmlmap.DateField('date') child = xmlmap.NodeField('bar[1]', TestSubobject, verbose_name='Child bar1') children = xmlmap.NodeListField('bar', TestSubobject) other_child = xmlmap.NodeField('plugh', OtherTestSubobject) my_opt = xmlmap.StringField('opt', choices=['a', 'b', 'c']) text = xmlmap.StringListField('text') numbers = xmlmap.IntegerListField('number')
class EventInfo(XmlObject): """ EventInfo complex type """ ROOT_NAME = 'EVENTINFO' report_type = xmlmap.IntegerField('@reporttype') """ Report type :type `int` """ type = xmlmap.StringField('@eventtype') """ Event type :type `string` """ time = DateField('@eventtime') """ Event date :type :class:`datetime.datetime`""" name = xmlmap.StringField('@friendlyeventname', required=False) """ Friendly event name :type `string` """ description = xmlmap.StringField('@eventdescription', required=False) """ Event description :type `string` """
class Items(xmlmap.XmlObject): ''':class:`~eulxml.xmlmap.XmlObject` for the response returned by getItems. Has a count of the number of items found, and a list of :class:`Item` objects with details about each item.''' _count = xmlmap.IntegerField('@count') 'number of items in the result' items = xmlmap.NodeListField('item', Item) 'List of items as instances of :class:`~readux.books.digwf.Item`' @property def count(self): # in an empty result set, count is not set; return 0 to simplify # code logic where results are checked return self._count or 0
class _EadBase(xmlmap.XmlObject): '''Common EAD namespace declarations, for use by all EAD XmlObject instances.''' ROOT_NS = EAD_NAMESPACE ROOT_NAME = 'ead' ROOT_NAMESPACES = { 'e': ROOT_NS, 'xlink': XLINK_NAMESPACE, 'exist': 'http://exist.sourceforge.net/NS/exist' } # TODO: if there are any universal EAD attributes, they should be added here # NOTE: this is not an EAD field, but simplifies using EAD objects with eXist # by making exist match-count totals available at any level match_count = xmlmap.IntegerField("count(.//exist:match)") 'Count of exist matches under the current field - for use with EAD and eXist-db'
class Document(Base): ''':class:`~eulxml.xmlmap.XmlObject` class for an ABBYY OCR XML Document. .. Note:: Currently there is no support for tabular formatting elements. ''' ROOT_NAME = 'document' pages = xmlmap.NodeListField(frns('page'), Page) 'pages as :class:`Page`' page_count = xmlmap.IntegerField('@pagesCount') 'integer page_count (document ``@pagesCount``)' language = xmlmap.StringField('@mainLanguage') 'main language of the document' languages = xmlmap.StringField('@languages') 'all languages included in the document'
class Response(xmlmap.XmlObject): '''An XML response to a SHERPA/RoMEO query''' # this mapping ignores header/parameters num_hits = xmlmap.IntegerField('header/numhits') api_control = xmlmap.StringField('header/apicontrol') # api_control values: all, colour, followup, identifier, invalid, # journal, publisher outcome = xmlmap.StringField('header/outcome') # outcome values: excessJournals, failed, manyJournals, notFound, # publisherFound, singleJournal, uniqueZetoc message = xmlmap.StringField('header/message') journals = xmlmap.NodeListField('journals/journal', Journal) publishers = xmlmap.NodeListField('publishers/publisher', Publisher) def __repr__(self): if self.num_hits is None: return u'<%s: %s>' % (self.__class__.__name__, self.outcome) else: return u'<%s: %s (%d hits)>' % (self.__class__.__name__, self.outcome, self.num_hits)
class RocheTEI(teimap.Tei): title_en = xmlmap.StringField( 'tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[@xml:lang="en"]') chapter = xmlmap.IntegerField('tei:text/tei:body/tei:div/@n') chapter_title = xmlmap.StringField( 'tei:text/tei:body/tei:div/tei:div/descendant::tei:p[1]') place_names = xmlmap.StringListField('//tei:placeName') persons = xmlmap.StringListField('//tei:persName') terms = xmlmap.StringListField('//tei:term') @property def first_letter_author(self): """Return the first letter of the authors surname""" return self.author @property def first_letter_title(self): """Return the first letter of the title""" return self.title
class PremisObject(premis.Object): composition_level = xmlmap.IntegerField( 'p:objectCharacteristics/p:compositionLevel') checksums = xmlmap.NodeListField('p:objectCharacteristics/p:fixity', PremisFixity) format = xmlmap.NodeField('p:objectCharacteristics/p:format', PremisObjectFormat) latest_format = xmlmap.NodeField( 'p:objectCharacteristics[position() = last()]/p:format', PremisObjectFormat) creating_application = xmlmap.NodeField( 'p:objectCharacteristics/p:creatingApplication', PremisCreatingApplication) original_environment = xmlmap.NodeField( 'p:environment[p:environmentNote="Original environment"]', PremisEnvironment) characteristics = xmlmap.NodeListField('p:objectCharacteristics', PremisObjectCharacteristics) relationships = xmlmap.NodeListField('p:relationship', PremisRelationship)