def _walkZeeRex(self, session, node): if node.localName in ['indexInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseName = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'title': self.title = str(flattenTexts(node)) elif node.localName == 'contact': self.contacts.append(str(flattenTexts(node))) elif node.localName == 'schema': id = node.getAttribute('identifier') location = node.getAttribute('location') name = node.getAttribute('name') txrid = node.getAttributeNS(self.c3Namespace, 'transformer') if (txrid): txr = self.get_object(session, txrid) if (txr == None): raise ConfigFileException("No transformer to map to for %s" % (txrid)) self.transformerHash[id] = txr self.recordNamespaces[name] = id self.schemaLocations[id] = location else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _handleLxmlConfigNode(self, session, node): if node.tag in self.simpleNodes: setattr(self, node.tag[node.tag.find("}") + 1 :], flattenTexts(node).strip()) elif node.tag in ["flags", "{%s}flags" % CONFIG_NS]: # Extract Rights info # <flags> <flag> <object> <value> </flag> </flags> for c in node.iterchildren(tag=etree.Element): if c.tag in ["flag", "{%s}flag" % CONFIG_NS]: obj = None flag = None for c2 in c.iterchildren(tag=etree.Element): if c2.tag in ["object", "{%s}object" % CONFIG_NS]: obj = flattenTexts(c2).strip() elif c2.tag in ["value", "{%s}value" % CONFIG_NS]: flag = flattenTexts(c2).strip() if flag not in self.allFlags and flag[:4] != "c3fn": msg = "Unknown flag: %s" % flag raise ConfigFileException(msg) if obj is None or flag is None: msg = "Missing object or value element for flag for " "user %s" % self.username raise ConfigFileException() f = self.flags.get(flag, []) if obj: f.append(obj) self.flags[flag] = f elif node.tag in ["history", "{%s}history" % CONFIG_NS]: # Extract user history pass elif node.tag in ["hostmask", "{%s}hostmask" % CONFIG_NS]: # Extract allowed hostmask list pass
def _walkZeeRex(self, session, node): if node.localName in ['indexInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseName = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'title': self.title = str(flattenTexts(node)) elif node.localName == 'contact': self.contacts.append(str(flattenTexts(node))) elif node.localName == 'schema': id = node.getAttribute('identifier') location = node.getAttribute('location') name = node.getAttribute('name') txrid = node.getAttributeNS(self.c3Namespace, 'transformer') if (txrid): txr = self.get_object(session, txrid) if (txr is None): raise ConfigFileException( "No transformer to map to for %s" % (txrid)) self.transformerHash[id] = txr self.recordNamespaces[name] = id self.schemaLocations[id] = location else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _handleLxmlConfigNode(self, session, node): if (node.tag in self.simpleNodes): setattr(self, node.tag, flattenTexts(node).strip()) elif (node.tag == "flags"): # Extract Rights info # <flags> <flag> <object> <value> </flag> </flags> for c in node.iterchildren(tag=etree.Element): if c.tag == "flag": obj = None flag = None for c2 in c.iterchildren(tag=etree.Element): if c2.tag == "object": obj = flattenTexts(c2).strip() elif c2.tag == "value": flag = flattenTexts(c2).strip() if (flag not in self.allFlags) and (flag[:4] != "c3fn"): raise ConfigFileException("Unknown flag: %s" % flag) if obj == None or flag == None: raise ConfigFileException("Missing object or value element for flag for user %s" % self.username) f = self.flags.get(flag, []) if (obj): f.append(obj) self.flags[flag] = f elif (node.tag == "history"): # Extract user history pass elif (node.tag == "hostmask"): # Extract allowed hostmask list pass
def _handleLxmlConfigNode(self, session, node): if node.tag in self.simpleNodes: setattr(self, node.tag[node.tag.find('}') + 1:], flattenTexts(node).strip()) elif node.tag in ["flags", '{%s}flags' % CONFIG_NS]: # Extract Rights info # <flags> <flag> <object> <value> </flag> </flags> for c in node.iterchildren(tag=etree.Element): if c.tag in ["flag", '{%s}flag' % CONFIG_NS]: obj = None flag = None for c2 in c.iterchildren(tag=etree.Element): if c2.tag in ["object", '{%s}object' % CONFIG_NS]: obj = flattenTexts(c2).strip() elif c2.tag in ["value", '{%s}value' % CONFIG_NS]: flag = flattenTexts(c2).strip() if (flag not in self.allFlags and flag[:4] != "c3fn"): msg = "Unknown flag: %s" % flag raise ConfigFileException(msg) if obj is None or flag is None: msg = ("Missing object or value element for flag for " "user %s" % self.username) raise ConfigFileException() f = self.flags.get(flag, []) if (obj): f.append(obj) self.flags[flag] = f elif node.tag in ["history", '{%s}history' % CONFIG_NS]: # Extract user history pass elif node.tag in ["hostmask", '{%s}hostmask' % CONFIG_NS]: # Extract allowed hostmask list pass
def _handleLxmlConfigNode(self, session, node): if (node.tag == "cluster"): maps = [] for child in node.iterchildren(tag=etree.Element): if (child.tag == "map"): t = child.attrib.get('type', '') map = [] for xpchild in child.iterchildren(tag=etree.Element): if (xpchild.tag == "xpath"): map.append(flattenTexts(xpchild).strip()) elif (xpchild.tag == "process"): # turn xpath chain to workflow ref = xpchild.attrib.get('ref', None) if ref is not None: process = self.get_object(session, ref) else: xpchild.tag = 'workflow' process = CachingWorkflow( session, xpchild, self) process._handleLxmlConfigNode(session, xpchild) map.append(process) #vxp = [map[0]] if (len(map) < 3): # default ExactExtractor map.append([['extractor', 'SimpleExtractor']]) if (t == u'key'): self.keyMap = [map[0], map[1], map[2]] else: maps.append([map[0], map[1], map[2]]) self.maps = maps
def _handleLxmlConfigNode(self, session, node): if (node.tag == "cluster"): maps = [] for child in node.iterchildren(tag=etree.Element): if (child.tag == "map"): t = child.attrib.get('type', '') map = [] for xpchild in child.iterchildren(tag=etree.Element): if (xpchild.tag == "xpath"): map.append(flattenTexts(xpchild).strip()) elif (xpchild.tag == "process"): # turn xpath chain to workflow ref = xpchild.attrib.get('ref', None) if ref is not None: process = self.get_object(session, ref) else: xpchild.tag = 'workflow' process = CachingWorkflow(session, xpchild, self) process._handleLxmlConfigNode(session, xpchild) map.append(process) #vxp = [map[0]] if (len(map) < 3): # default ExactExtractor map.append([['extractor', 'SimpleExtractor']]) if (t == u'key'): self.keyMap = [map[0], map[1], map[2]] else: maps.append([map[0], map[1], map[2]]) self.maps = maps
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseUrl = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _handleLxmlConfigNode(self, session, node): if node.tag == 'relations': self.relations = {} for rel in node.iterchildren(tag=etree.Element): if rel.tag == 'relation': relName = rel.attrib.get('name', None) if relName is None: raise ConfigFileException('Name not supplied for relation') fields = [] for fld in rel.iterchildren(tag=etree.Element): if fld.tag == 'object': oid = flattenTexts(fld) fields.append([oid, 'VARCHAR', oid]) elif fld.tag == 'field': fname = fld.attrib.get('name', None) if fname is None: ConfigFileException('Name not supplied for field') ftype = flattenTexts(fld) fields.append([fname, ftype, '']) self.relations[relName] = fields
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo', 'indexInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseUrl = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'schema': id = node.getAttribute('identifier') name = node.getAttribute('name') xsl = node.getAttributeNS(self.c3Namespace, 'transformer') if (xsl): txr = self.get_object(session, xsl) if (txr == None): raise ConfigFileException("No transformer to map to for %s" % (xsl)) self.transformerHash[id] = txr self.recordNamespaces[name] = id elif node.localName == "supports": stype = node.getAttribute('type') data = flattenTexts(node) if (stype == 'operation'): wflw = node.getAttributeNS(self.c3Namespace, 'workflow') if (wflw): flow = self.get_object(session, wflw) if (flow == None): raise ConfigFileException("No workflow to map to for %s" % wflw) self.workflowHash[data] = self.get_object(session, wflw) elif (node.localName == 'default'): dtype = node.getAttribute('type') pname = "default" + dtype[0].capitalize() + dtype[1:] data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, pname, data) elif (node.localName =='setting'): dtype = node.getAttribute('type') data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, dtype, data) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _walkZeeRex(self, session, node): if node.localName in ["databaseInfo", "metaInfo", "indexInfo"]: # Ignore return elif node.localName == "serverInfo": self.version = node.getAttribute("version") for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == "database": self.databaseUrl = str(flattenTexts(node)) elif node.localName == "host": self.host = str(flattenTexts(node)) elif node.localName == "port": self.port = int(flattenTexts(node)) elif node.localName == "schema": id = node.getAttribute("identifier") name = node.getAttribute("name") xsl = node.getAttributeNS(self.c3Namespace, "transformer") if xsl: txr = self.get_object(session, xsl) if txr is None: raise ConfigFileException("No transformer to map to for " "%s" % (xsl)) self.transformerHash[id] = txr self.recordNamespaces[name] = id elif node.localName == "supports": stype = node.getAttribute("type") data = flattenTexts(node) if stype == "operation": wflw = node.getAttributeNS(self.c3Namespace, "workflow") if wflw: flow = self.get_object(session, wflw) if flow is None: raise ConfigFileException("No workflow to map to for " "%s" % wflw) self.workflowHash[data] = self.get_object(session, wflw) elif node.localName == "default": dtype = node.getAttribute("type") pname = "default" + dtype[0].capitalize() + dtype[1:] data = flattenTexts(node) if data.isdigit(): data = int(data) elif data == "false": data = 0 elif data == "true": data = 1 setattr(self, pname, data) elif node.localName == "setting": dtype = node.getAttribute("type") data = flattenTexts(node) if data.isdigit(): data = int(data) elif data == "false": data = 0 elif data == "true": data = 1 setattr(self, dtype, data) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _handleLxmlConfigNode(self, session, node): if node.tag in ['relations', '{%s}relations' % CONFIG_NS]: self.relations = {} for rel in node.iterchildren(tag=etree.Element): if rel.tag in ['relation', '{%s}relation' % CONFIG_NS]: relName = rel.attrib.get('name', None) if relName is None: raise ConfigFileException('Name not supplied for ' 'relation') fields = [] for fld in rel.iterchildren(tag=etree.Element): if fld.tag in ['object', '{%s}object' % CONFIG_NS]: oid = flattenTexts(fld) fields.append([oid, 'VARCHAR', oid]) elif fld.tag in ['field', '{%s}field' % CONFIG_NS]: fname = fld.attrib.get('name', None) if fname is None: ConfigFileException('Name not supplied for ' 'field') ftype = flattenTexts(fld) fields.append([fname, ftype, '']) self.relations[relName] = fields
def dataFromRecordXPaths(session, rec, xps, nTerms=1, joiner=u'; '): """Extract data from ``rec`` return a single unicode object. Extract data from ``rec`` using multiple XPaths ``xps`` in priority order. Return a maximum of ``nTerms`` matches, joining any multiple values with `joiner``. """ global namespaceUriHash data = [] for xp in xps: data.extend(rec.process_xpath(session, xp, namespaceUriHash)) if len(data) >= nTerms: break return joiner.join([flattenTexts(d) for d in data[:nTerms]])
def __init__(self, data, xml="", docId=None, wordCount=0, byteCount=0): self.dom = data self.xml = xml self.id = docId self.parent = ('', '', -1) self.context = None self.metadata = {} if wordCount: self.wordCount = wordCount else: try: # Sometimes this blows up self.wordCount = len(flattenTexts(data).split()) except: self.wordCount = 0 self.byteCount = byteCount
def _backwalkTitles(rec, xpath): titles = [] xpathParts = xpath.split('/') while xpathParts[-1] != 'dsc': try: tn = rec.process_xpath(session, '/'.join(xpathParts) + '/did/unittitle')[0] t = flattenTexts(tn) titles.append(t.strip()) except IndexError: print etree.dump(rec.process_xpath(session, '/'.join(xpathParts) + '/did')[0]) raise xpathParts.pop(-1) titles.reverse() return titles
def _handleLxmlConfigNode(self, session, node): if (node.tag == "transform"): self.tagset = node.attrib.get('tagset', '') maps = [] for child in node.iterchildren(tag=etree.Element): if child.tag == 'map': map = [] for xpchild in child.iterchildren(tag=etree.Element): if xpchild.tag == "xpath": map.append(flattenTexts(xpchild)) if map[0][0] != "#": vxp = [map[0]] else: # special case to process vxp = [map[0]] maps.append([vxp[0], map[1]]) self.maps = maps
def _handleLxmlConfigNode(self,session, node): if node.tag in ["transform", '{%s}transform' % CONFIG_NS]: self.tagset = node.attrib.get('tagset', '') maps = [] for child in node.iterchildren(tag=etree.Element): if child.tag in ['map', '{%s}map' % CONFIG_NS]: map = [] for xpchild in child.iterchildren(tag=etree.Element): if xpchild.tag in ["xpath", '{%s}xpath' % CONFIG_NS]: map.append(flattenTexts(xpchild)) if map[0][0] != "#": vxp = [map[0]] else: # special case to process vxp = [map[0]] maps.append([vxp[0], map[1]]) self.maps = maps
def _handleLxmlLog(self, node): text = flattenTexts(node) if text[0] != '"': text = repr(text) ref = node.attrib.get('ref', '') lvl = node.attrib.get('level', '') if (ref): self.objrefs.add(ref) obj = "self.objcache[%s]" % ref else: obj = "self.defaultLogger" if lvl: if lvl.isdigit(): return ["%s.log_lvl(session, %s, str(%s).strip())" % (obj, lvl, text)] else: return ["%s.log_%s(session, str(%s).strip())" % (obj, lvl, text)] else: return ["%s.log(session, str(%s).strip())" % (obj, text)]
def _handleConfigNode(self,session, node): if (node.localName == "transform"): self.tagset = node.getAttributeNS(None, 'tagset') maps = [] for child in node.childNodes: if (child.nodeType == elementType and child.localName == "map"): map = [] for xpchild in child.childNodes: if (xpchild.nodeType == elementType and xpchild.localName == "xpath"): map.append(flattenTexts(xpchild)) if map[0][0] != "#": # vxp = verifyXPaths([map[0]]) vxp = [map[0]] else: # special case to process vxp = [map[0]] maps.append([vxp[0], map[1]]) self.maps = maps
def _handleConfigNode(self, session, node): if (node.localName == "cluster"): maps = [] for child in node.childNodes: if (child.nodeType == elementType and child.localName == "map"): t = child.getAttributeNS(None, 'type') map = [] for xpchild in child.childNodes: if (xpchild.nodeType == elementType and xpchild.localName == "xpath"): map.append(flattenTexts(xpchild)) elif (xpchild.nodeType == elementType and xpchild.localName == "process"): # turn xpath chain to workflow ref = xpchild.getAttributeNS(None, 'ref') if ref: process = self.get_object(session, ref) else: try: xpchild.localName = 'workflow' except: # 4suite dom sets read only newTop = xpchild.ownerDocument.createElementNS( None, 'workflow') for kid in xpchild.childNodes: newTop.appendChild(kid) xpchild = newTop process = CachingWorkflow( session, xpchild, self) process._handleConfigNode(session, xpchild) map.append(process) # XXX FIX ME # vxp = verifyXPaths([map[0]]) vxp = [map[0]] if (len(map) < 3): # default ExactExtractor map.append([['extractor', 'SimpleExtractor']]) if (t == u'key'): self.keyMap = [vxp[0], map[1], map[2]] else: maps.append([vxp[0], map[1], map[2]]) self.maps = maps
def _handleLxmlLog(self, node): code = [] ref = node.attrib.get('ref', '') if ref: code.append("object = db.get_object(session, '%s')" % ref) else: code.append("object = db.get_path(session, 'defaultLogger')") text = flattenTexts(node) if text[0] != '"': text = repr(text) lvl = node.attrib.get('level', '') if (lvl): if lvl.isdigit(): code.append("object.log_lvl(session, %s, str(%s).strip())" % (int(lvl), text)) else: code.append("object.log_%s(session, str(%s).strip())" % (lvl, text)) else: code.append("object.log(session, str(%s).strip())" % text) return code
def _handleLog(self, node): text = flattenTexts(node) if not text.startswith('"'): text = repr(text) ref = node.getAttributeNS(None, 'ref') lvl = node.getAttributeNS(None, 'level') if (ref): self.objrefs.add(ref) obj = "self.objcache[%s]" % ref else: obj = "self.defaultLogger" if lvl: if lvl.isdigit(): return ["%s.log_lvl(session, %s, " "str(%s).strip())" % (obj, lvl, text)] else: return ["%s.log_%s(session, " "str(%s).strip())" % (obj, lvl, text)] else: return ["%s.log(session, str(%s).strip())" % (obj, text)]
def _handleLog(self, node): code = [] ref = node.getAttributeNS(None, 'ref') if (ref): code.append("object = db.get_object(session, '%s')" % ref) else: code.append("object = db.get_path(session, 'defaultLogger')") text = flattenTexts(node) if not text.startswith('"'): text = repr(text) lvl = node.getAttributeNS(None, 'level') if (lvl): if lvl.isdigit(): code.append("object.log_lvl(session, %s, " "str(%s).strip())" % (int(lvl), text)) else: code.append("object.log_%s(session, " "str(%s).strip())" % (lvl, text)) else: code.append("object.log(session, str(%s).strip())" % text) return code
def _handleConfigNode(self, session, node): if (node.localName == "cluster"): maps = [] for child in node.childNodes: if (child.nodeType == elementType and child.localName == "map"): t = child.getAttributeNS(None, 'type') map = [] for xpchild in child.childNodes: if (xpchild.nodeType == elementType and xpchild.localName == "xpath"): map.append(flattenTexts(xpchild)) elif (xpchild.nodeType == elementType and xpchild.localName == "process"): # turn xpath chain to workflow ref = xpchild.getAttributeNS(None, 'ref') if ref: process = self.get_object(session, ref) else: try: xpchild.localName = 'workflow' except: # 4suite dom sets read only newTop = xpchild.ownerDocument.createElementNS(None, 'workflow') for kid in xpchild.childNodes: newTop.appendChild(kid) xpchild = newTop process = CachingWorkflow(session, xpchild, self) process._handleConfigNode(session, xpchild) map.append(process) # XXX FIX ME # vxp = verifyXPaths([map[0]]) vxp = [map[0]] if (len(map) < 3): # default ExactExtractor map.append([['extractor', 'SimpleExtractor']]) if (t == u'key'): self.keyMap = [vxp[0], map[1], map[2]] else: maps.append([vxp[0], map[1], map[2]]) self.maps = maps
def _handleLog(self, node): text = flattenTexts(node) if not text.startswith('"'): text = repr(text) ref = node.getAttributeNS(None, 'ref') lvl = node.getAttributeNS(None, 'level') if (ref): self.objrefs.add(ref) obj = "self.objcache[%s]" % ref else: obj = "self.defaultLogger" if lvl: if lvl.isdigit(): return [ "%s.log_lvl(session, %s, " "str(%s).strip())" % (obj, lvl, text) ] else: return [ "%s.log_%s(session, " "str(%s).strip())" % (obj, lvl, text) ] else: return ["%s.log(session, str(%s).strip())" % (obj, text)]
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseName = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'recordSyntax': id = node.getAttribute('identifier') # id is string dotted OID thash = {} for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'elementSet'): name = c.getAttribute('name') xsl = c.getAttributeNS(self.c3Namespace, 'transformer') if (xsl): txr = self.get_object(session, xsl) if (txr == None): raise ConfigFileException("[%s] No transformer to map to for %s" % (self.id, xsl)) else: txr = None thash[name.lower()] = txr self.transformerHash[id] = thash elif node.localName == 'set': name = node.getAttribute('name') name = name.lower() uri = node.getAttribute('identifier') if not name or not uri: raise ConfigFileException('Missing name or identifier for attribute set mappting.') if (name in self.prefixes and uri != self.prefixes[name]): raise(ConfigFileException('Multiple OIDs bound to same short name: %s -> %s' % (name, uri))) self.prefixes[str(name)] = str(uri) elif node.localName == 'index': # Process indexes idxName = node.getAttributeNS(self.c3Namespace, 'index') indexObject = self.get_object(session, idxName) if indexObject == None: raise(ConfigFileException("Could not find Index object %s" % (idxName))) maps = [] defaults = [] supports = [] for c in node.childNodes: if c.nodeType == elementType: if c.localName == 'map': maps.append(self._walkZeeRex(session, c)) elif c.localName == 'configInfo': for c2 in c.childNodes: if c2.nodeType == elementType: if c2.localName == "default": # Get default attributes for c3 in c2.childNodes: if c3.nodeType == elementType and c3.localName == 'map': defaults = self._walkZeeRex(session, c3) elif c2.localName == "supports": # Get other supported attributes for c3 in c2.childNodes: if c3.nodeType == elementType and c3.localName == 'map': # Can't use c3:index to redirect here Too complicated data = self._walkZeeRex(session, c3) supports.append(data) # FIXME: This is wrong. It doesn't respect combinations. for s in supports: defaults.extend(s) for m in maps: curr = self.indexHash.get(tuple(m), []) curr.append((defaults, indexObject)) self.indexHash[tuple(m)] = curr elif (node.localName == 'map'): attrs = [] for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'attr'): short = c.getAttribute('set') short = short.lower() if not short: short = 'bib1' oid = self.prefixes.get(short, None) if not oid: raise ConfigFileException('No mapping for attribute set %s in Zeerex file' % short) type = c.getAttribute('type') if not type: raise ConfigFileException('No type attribute for Z39.50 attr mapping in Zeerex') type = int(type) if type < 1: raise ConfigFileException('Invalid type attribute for Z39.50 attr mapping in Zeerex: %s' % type) attrVal = flattenTexts(c).strip() if not attrVal: raise ConfigFileException('No value given for attribute in Z39.50 mapping') if attrVal.isdigit(): attrVal = int(attrVal) else: attrVal = attrVal.lower() attrs.append((oid, type, attrVal)) return attrs elif (node.localName == 'default'): dtype = node.getAttribute('type') pname = "default" + dtype[0].capitalize() + dtype[1:] if dtype == 'attribute': # Get map instead of text for c in node.childNodes: if c.nodeType == elementType and c.localName == 'map': data = self._walkZeeRex(session, c) else: data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 elif data.lower() in self.prefixes.keys(): data = self.prefixes[data.lower()] setattr(self, pname, data) elif (node.localName =='setting'): dtype = node.getAttribute('type') data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, dtype, data) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseUrl = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'schema': id = node.getAttribute('identifier') name = node.getAttribute('name') xsl = node.getAttributeNS(self.c3Namespace, 'transformer') if (xsl): txr = self.get_object(session, xsl) if (txr is None): raise ConfigFileException("No transformer to map to for " "%s" % (xsl)) self.transformerHash[id] = txr self.recordNamespaces[name] = id elif node.localName == 'set': name = node.getAttribute('name') uri = node.getAttribute('identifier') if (name in self.prefixes and uri != self.prefixes[name]): raise ConfigFileException('Multiple URIs bound to same short ' 'name: %s -> %s' % (name, uri)) self.prefixes[str(name)] = str(uri) elif node.localName == 'index': # Process indexes idxName = node.getAttributeNS(self.c3Namespace, 'index') indexObject = self.get_object(session, idxName) if indexObject is None: raise(ConfigFileException("[%s] No Index to map to for " "%s" % (self.id, idxName))) maps = [] for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'map'): maps.append(self._walkZeeRex(session, c)) for m in maps: self.indexHash[m] = indexObject # Need to generate all relations and modifiers for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'configInfo'): for c2 in c.childNodes: if ( c2.nodeType == elementType and c2.localName == 'supports' ): idxName2 = c2.getAttributeNS(self.c3Namespace, 'index') if (not idxName2): indexObject2 = indexObject else: indexObject2 = self.get_object(session, idxName2) if indexObject2 is None: raise ConfigFileException( "[%s] No Index to map to for " "%s" % (self.id, idxName2) ) st = str(c2.getAttribute('type')) val = str(flattenTexts(c2)) for m in maps: self.indexHash[(m[0], m[1], (st, val))] = indexObject2 elif (node.localName == 'map'): for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'name'): short = c.getAttribute('set') index = flattenTexts(c) index = index.lower() uri = self.resolvePrefix(short) if (not uri): raise ConfigFileException("No mapping for %s in " "Zeerex" % (short)) return (str(uri), str(index)) elif (node.localName == 'default'): dtype = node.getAttribute('type') # XXX: would dtype.title() be nicer!? pname = "default" + dtype[0].capitalize() + dtype[1:] data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, pname, data) elif (node.localName == 'setting'): dtype = node.getAttribute('type') data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, dtype, data) elif (node.localName == 'supports'): stype = node.getAttribute('type') if stype in ['extraData', 'extraSearchData', 'extraScanData', 'extraExplainData', 'extension']: # function, xnType, sruName xn = node.getAttributeNS(self.c3Namespace, 'type') if (not xn in ['record', 'term', 'searchRetrieve', 'scan', 'explain', 'response']): raise ConfigFileException('Unknown extension type %s' % xn) sru = node.getAttributeNS(self.c3Namespace, 'sruName') fn = node.getAttributeNS(self.c3Namespace, 'function') data = flattenTexts(node) data = data.strip() data = tuple(data.split(' ')) if fn.find('.') > -1: # new version try: fn = dynamic.importObject(session, fn) except ImportError: raise ConfigFileException("Cannot find handler " "function %s (in %s)" % (fn, repr(sys.path))) self.sruExtensionMap[sru] = (xn, fn, data) else: if (hasattr(srwExtensions, fn)): fn = getattr(srwExtensions, fn) else: raise ConfigFileException('Cannot find handler ' 'function %s in ' 'srwExtensions.' % fn) xform = node.getAttributeNS(self.c3Namespace, 'sruFunction') if not xform: sruFunction = srwExtensions.simpleRequestXform elif hasattr(srwExtensions, xform): sruFunction = getattr(srwExtensions, xform) else: raise ConfigFileException('Cannot find transformation ' 'function %s in ' 'srwExtensions.' % xform) hashAttr = xn + "ExtensionHash" curr = getattr(self, hashAttr) curr[data] = fn setattr(self, hashAttr, curr) self.sruExtensionMap[sru] = (data[0], data[1], sruFunction) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo', 'indexInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseUrl = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'schema': id = node.getAttribute('identifier') name = node.getAttribute('name') xsl = node.getAttributeNS(self.c3Namespace, 'transformer') if (xsl): txr = self.get_object(session, xsl) if (txr is None): raise ConfigFileException("No transformer to map to for " "%s" % (xsl)) self.transformerHash[id] = txr self.recordNamespaces[name] = id elif node.localName == "supports": stype = node.getAttribute('type') data = flattenTexts(node) if (stype == 'operation'): wflw = node.getAttributeNS(self.c3Namespace, 'workflow') if (wflw): flow = self.get_object(session, wflw) if (flow is None): raise ConfigFileException("No workflow to map to for " "%s" % wflw) self.workflowHash[data] = self.get_object(session, wflw) elif (node.localName == 'default'): dtype = node.getAttribute('type') pname = "default" + dtype[0].capitalize() + dtype[1:] data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, pname, data) elif (node.localName == 'setting'): dtype = node.getAttribute('type') data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, dtype, data) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _walkZeeRex(self, session, node): if node.localName in ['databaseInfo', 'metaInfo']: # Ignore return elif node.localName == 'serverInfo': self.version = node.getAttribute('version') for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == 'database': self.databaseName = str(flattenTexts(node)) elif node.localName == 'host': self.host = str(flattenTexts(node)) elif node.localName == 'port': self.port = int(flattenTexts(node)) elif node.localName == 'recordSyntax': id = node.getAttribute('identifier') # id is string dotted OID thash = {} for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'elementSet'): name = c.getAttribute('name') xsl = c.getAttributeNS(self.c3Namespace, 'transformer') if (xsl): txr = self.get_object(session, xsl) if (txr is None): raise ConfigFileException( "[%s] No transformer to map to for %s" % (self.id, xsl)) else: txr = None thash[name.lower()] = txr self.transformerHash[id] = thash elif node.localName == 'set': name = node.getAttribute('name') name = name.lower() uri = node.getAttribute('identifier') if not name or not uri: raise ConfigFileException( 'Missing name or identifier for attribute set mappting.') if (name in self.prefixes and uri != self.prefixes[name]): raise (ConfigFileException( 'Multiple OIDs bound to same short name: %s -> %s' % (name, uri))) self.prefixes[str(name)] = str(uri) elif node.localName == 'index': # Process indexes idxName = node.getAttributeNS(self.c3Namespace, 'index') indexObject = self.get_object(session, idxName) if indexObject is None: raise (ConfigFileException("Could not find Index object %s" % (idxName))) maps = [] defaults = [] supports = [] for c in node.childNodes: if c.nodeType == elementType: if c.localName == 'map': maps.append(self._walkZeeRex(session, c)) elif c.localName == 'configInfo': for c2 in c.childNodes: if c2.nodeType == elementType: if c2.localName == "default": # Get default attributes for c3 in c2.childNodes: if c3.nodeType == elementType and c3.localName == 'map': defaults = self._walkZeeRex( session, c3) elif c2.localName == "supports": # Get other supported attributes for c3 in c2.childNodes: if c3.nodeType == elementType and c3.localName == 'map': # Can't use c3:index to redirect here Too complicated data = self._walkZeeRex( session, c3) supports.append(data) # FIXME: This is wrong. It doesn't respect combinations. for s in supports: defaults.extend(s) for m in maps: curr = self.indexHash.get(tuple(m), []) curr.append((defaults, indexObject)) self.indexHash[tuple(m)] = curr elif (node.localName == 'map'): attrs = [] for c in node.childNodes: if (c.nodeType == elementType and c.localName == 'attr'): short = c.getAttribute('set') short = short.lower() if not short: short = 'bib1' oid = self.prefixes.get(short, None) if not oid: raise ConfigFileException( 'No mapping for attribute set %s in Zeerex file' % short) type = c.getAttribute('type') if not type: raise ConfigFileException( 'No type attribute for Z39.50 attr mapping in Zeerex' ) type = int(type) if type < 1: raise ConfigFileException( 'Invalid type attribute for Z39.50 attr mapping in Zeerex: %s' % type) attrVal = flattenTexts(c).strip() if not attrVal: raise ConfigFileException( 'No value given for attribute in Z39.50 mapping') if attrVal.isdigit(): attrVal = int(attrVal) else: attrVal = attrVal.lower() attrs.append((oid, type, attrVal)) return attrs elif (node.localName == 'default'): dtype = node.getAttribute('type') pname = "default" + dtype[0].capitalize() + dtype[1:] if dtype == 'attribute': # Get map instead of text for c in node.childNodes: if c.nodeType == elementType and c.localName == 'map': data = self._walkZeeRex(session, c) else: data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 elif data.lower() in self.prefixes.keys(): data = self.prefixes[data.lower()] setattr(self, pname, data) elif (node.localName == 'setting'): dtype = node.getAttribute('type') data = flattenTexts(node) if (data.isdigit()): data = int(data) elif data == 'false': data = 0 elif data == 'true': data = 1 setattr(self, dtype, data) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)
def _walkZeeRex(self, session, node): if node.localName in ["databaseInfo", "metaInfo"]: # Ignore return elif node.localName == "serverInfo": self.version = node.getAttribute("version") for c in node.childNodes: self._walkZeeRex(session, c) elif node.localName == "database": self.databaseUrl = str(flattenTexts(node)) elif node.localName == "host": self.host = str(flattenTexts(node)) elif node.localName == "port": self.port = int(flattenTexts(node)) elif node.localName == "schema": id = node.getAttribute("identifier") name = node.getAttribute("name") xsl = node.getAttributeNS(self.c3Namespace, "transformer") if xsl: txr = self.get_object(session, xsl) if txr is None: raise ConfigFileException("No transformer to map to for " "%s" % (xsl)) self.transformerHash[id] = txr self.recordNamespaces[name] = id elif node.localName == "set": name = node.getAttribute("name") uri = node.getAttribute("identifier") if name in self.prefixes and uri != self.prefixes[name]: raise ConfigFileException("Multiple URIs bound to same short " "name: %s -> %s" % (name, uri)) self.prefixes[str(name)] = str(uri) elif node.localName == "index": # Process indexes idxName = node.getAttributeNS(self.c3Namespace, "index") indexObject = self.get_object(session, idxName) if indexObject is None: raise (ConfigFileException("[%s] No Index to map to for " "%s" % (self.id, idxName))) maps = [] for c in node.childNodes: if c.nodeType == elementType and c.localName == "map": maps.append(self._walkZeeRex(session, c)) for m in maps: self.indexHash[m] = indexObject # Need to generate all relations and modifiers for c in node.childNodes: if c.nodeType == elementType and c.localName == "configInfo": for c2 in c.childNodes: if c2.nodeType == elementType and c2.localName == "supports": idxName2 = c2.getAttributeNS(self.c3Namespace, "index") if not idxName2: indexObject2 = indexObject else: indexObject2 = self.get_object(session, idxName2) if indexObject2 is None: raise ConfigFileException("[%s] No Index to map to for " "%s" % (self.id, idxName2)) st = str(c2.getAttribute("type")) val = str(flattenTexts(c2)) for m in maps: self.indexHash[(m[0], m[1], (st, val))] = indexObject2 elif node.localName == "map": for c in node.childNodes: if c.nodeType == elementType and c.localName == "name": short = c.getAttribute("set") index = flattenTexts(c) index = index.lower() uri = self.resolvePrefix(short) if not uri: raise ConfigFileException("No mapping for %s in " "Zeerex" % (short)) return (str(uri), str(index)) elif node.localName == "default": dtype = node.getAttribute("type") # XXX: would dtype.title() be nicer!? pname = "default" + dtype[0].capitalize() + dtype[1:] data = flattenTexts(node) if data.isdigit(): data = int(data) elif data == "false": data = 0 elif data == "true": data = 1 setattr(self, pname, data) elif node.localName == "setting": dtype = node.getAttribute("type") data = flattenTexts(node) if data.isdigit(): data = int(data) elif data == "false": data = 0 elif data == "true": data = 1 setattr(self, dtype, data) elif node.localName == "supports": stype = node.getAttribute("type") if stype in ["extraData", "extraSearchData", "extraScanData", "extraExplainData", "extension"]: # function, xnType, sruName xn = node.getAttributeNS(self.c3Namespace, "type") if not xn in ["record", "term", "searchRetrieve", "scan", "explain", "response"]: raise ConfigFileException("Unknown extension type %s" % xn) sru = node.getAttributeNS(self.c3Namespace, "sruName") fn = node.getAttributeNS(self.c3Namespace, "function") data = flattenTexts(node) data = data.strip() data = tuple(data.split(" ")) if fn.find(".") > -1: # new version try: fn = dynamic.importObject(session, fn) except ImportError: raise ConfigFileException("Cannot find handler " "function %s (in %s)" % (fn, repr(sys.path))) self.sruExtensionMap[sru] = (xn, fn, data) else: if hasattr(srwExtensions, fn): fn = getattr(srwExtensions, fn) else: raise ConfigFileException("Cannot find handler " "function %s in " "srwExtensions." % fn) xform = node.getAttributeNS(self.c3Namespace, "sruFunction") if not xform: sruFunction = srwExtensions.simpleRequestXform elif hasattr(srwExtensions, xform): sruFunction = getattr(srwExtensions, xform) else: raise ConfigFileException( "Cannot find transformation " "function %s in " "srwExtensions." % xform ) hashAttr = xn + "ExtensionHash" curr = getattr(self, hashAttr) curr[data] = fn setattr(self, hashAttr, curr) self.sruExtensionMap[sru] = (data[0], data[1], sruFunction) else: for c in node.childNodes: if c.nodeType == elementType: self._walkZeeRex(session, c)