def parse_xml_to_array(xml_file, strict=False, normalize_form=None): """ parse an xml file and return the records as an array. If you would like the parser to explicitly check the namespaces for the MARCSlim namespace use the strict=True option. Valid values for normalize_form are 'NFC', 'NFKC', 'NFD', and 'NFKD'. See unicodedata.normalize info. """ handler = XmlHandler(strict, normalize_form) parse_xml(xml_file, handler) return handler.records
def map_xml(function, *files): """ map a function onto the file, so that for each record that is parsed the function will get called with the extracted record def do_it(r): print(r) map_xml(do_it, 'marc.xml') """ handler = XmlHandler() handler.process_record = function for xml_file in files: parse_xml(xml_file, handler)
def __call__(self, element): print element[0][1].text handler = marcxml.XmlHandler() marcxml.parse_xml(StringIO(tostring(element[0])), handler) return handler.records[0]
def __call__(self, element): handler = marcxml.XmlHandler(strict=True, normalize_form='NFC') marcxml.parse_xml(StringIO(tostring(element[0], encoding='UTF-8')), handler) return handler.records[0]
def __call__(self, element): #print element[0][1].text handler = marcxml.XmlHandler() marcxml.parse_xml(StringIO(tostring(element[0], encoding='UTF-8')), handler) return handler.records[0]
def MarcXML(xml): handler = marcxml.XmlHandler() data = tostring(xml, encoding='UTF-8') marcxml.parse_xml(io.BytesIO(data), handler) return handler.records[0]
def __call__(self, element): # print element[0][1].text handler = marcxml.XmlHandler(strict=True, normalize_form="NFC") marcxml.parse_xml(StringIO(tostring(element[0], encoding="UTF-8")), handler) return handler.records[0]
def __call__(self, element): handler = marcxml.XmlHandler() #marcxml.parse_xml( StringIO( """<?xml version="1.0" encoding="UTF-8"?>\n""" + tostring(element[0]) ), handler) marcxml.parse_xml( StringIO( tostring(element[0]) ), handler) return handler.records[0]
from OAIClient import * from oaipmh import metadata from oaipmh.datestamp import datestamp_to_datetime from cStringIO import StringIO from lxml.etree import tostring, XPathEvaluator from pymarc import marcxml from pymarc.record import * import datetime import sys import properties class MARCXMLReader(object): """Returns the PyMARC record from the OAI structure for MARC XML""" def __call__(self, element): handler = marcxml.XmlHandler() marcxml.parse_xml( StringIO( tostring(element[0]) ), handler) return handler.records[0] if len(sys.argv) != 4: print "Usage: deletebib.py <YYYYMMDD> <YYYYMMDD> <sigel1>[,sigel2,sigel3,...,sigeln]" exit(1) marcxml_reader = MARCXMLReader() oaipmh_host = 'http://data.libris.kb.se' from_time = sys.argv[1] until = sys.argv[2] sigels = sys.argv[3].split(',') user = properties.user password = properties.password
def __call__(self, element): handler = marcxml.XmlHandler() marcxml.parse_xml(StringIO(tostring(element[0])), handler) return marcxml.record_to_xml(handler.records[0], namespace=True)