def list_sets(self): #e.g. http://dspace.mit.edu/oai/request?verb=ListSets qstr = urllib.urlencode({'verb' : 'ListSets'}) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) sets = [] paths = [ u'string(o:setDescription/oai_dc:dc/dc:description)', u'string(o:setDescription/o:oclcdc/dc:description)', u'string(o:setDescription/dc:description)', u'string(o:setDescription)' ] def receive_nodes(n): setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES) setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES) #TODO better solution is to traverse setDescription amara tree for p in paths: setDescription = n.xml_select(p, prefixes=PREFIXES) if setDescription: break sets.append(dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)])) pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES) return sets
def compare_matches(self, xpath): del self.results[:] select_ids = set(node.xml_attributes["x"] for node in TREEDOC.xml_select("//"+xpath)) pushtree(TREE1, xpath, self.callback) push_ids = set(self.results) self.assertEquals(select_ids, push_ids)
def testsimpleelement(self): pushtree(self.infile,"a",self.callback) self.assertEquals(len(self.results), 2) expected_names = [ (u'http://spam.com/', u'a'), # XXX this should not be expected? (None, u'a') ] for node,ename in zip(self.results,expected_names): self.assertEquals(node.xml_name,ename)
def test_1(): EXPECTED = ['<a>0</a>', '<a>1</a>', '<a>10</a>', '<a>11</a>'] results = [] def callback(node): results.append(node) pushtree(XML1, u"a", callback) for result, expected in zip(results, EXPECTED): treecompare.check_xml(result.xml_encode(), XMLDECL+expected) return
def test_predicate2(): EXPECTED = ['<one>repeat</one>'] results = [] def callback(node): results.append(node) pushtree(XML5, u"doc/one[2]", callback) for result, expected in zip(results, EXPECTED): treecompare.check_xml(result.xml_encode(), XMLDECL+expected) return
def test_4(): EXPECTED = ['<x:a xmlns:x="urn:bogus:x">0</x:a>', '<x:a xmlns:x="urn:bogus:x">1</x:a>'] results = [] def callback(node): results.append(node) pushtree(XML4, u"x:a", callback, namespaces = {"x": "urn:bogus:x"}) for result, expected in zip(results, EXPECTED): treecompare.check_xml(result.xml_encode(), XMLDECL+expected) return
def test_2(): EXPECTED = ['<a xmlns="urn:bogus:x">0</a>', '<a xmlns="urn:bogus:x">1</a>', '<a xmlns="urn:bogus:x">10</a>', '<a xmlns="urn:bogus:x">11</a>'] results = [] def callback(node): results.append(node) pushtree(XML2, u"a", callback, namespaces = {None: "urn:bogus:x"}) for result, expected in zip(results, EXPECTED): treecompare.check_xml(result.xml_encode(), XMLDECL+expected) return
def list_sets(self): #e.g. http://dspace.mit.edu/oai/request?verb=ListSets qstr = urllib.urlencode({'verb' : 'ListSets'}) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() resp, content = self.h.request(url) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) sets = [] def receive_nodes(n): sets.append((n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES), n.xml_select(u'string(o:setName)', prefixes=PREFIXES))) pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES) return sets
def produce_csv(doc,csvWriter,justCount,projectVars): if not projectVars: def receive_header(node): for var in node.variable: projectVars.append(U(var.name)) pushtree(doc, u'head', receive_header, entity_factory=entity_base) cnt=Counter() @coroutine def receive_nodes(cnt): while True: node = yield if justCount: cnt.counter+=1 else: rt=[] badChars = False bindings = {} for binding in node.binding: try: newterm=U(binding).encode('ascii') except UnicodeEncodeError: newterm=U(binding).encode('ascii', 'ignore') badChars = True print >> sys.stderr, "Skipping character" if newterm: if projectVars: bindings[binding.name]=newterm else: rt.append(newTerm) for head in projectVars: rt.append(bindings.get(head,'')) if badChars: cnt.skipCounter += 1 csvWriter.writerow(rt) return target = receive_nodes(cnt) pushtree(doc, u'result', target.send, entity_factory=entity_base) target.close() return cnt
def test_predicate1(): EXPECTED = ['''<b x='4'> <d x='5' /> <e x='6' /> <d x='7' /> <b x='8' /> <c x='9' /> </b>'''] results = [] raise KnownFailure("No predicates support. See: http://trac.xml3k.org/ticket/23") def callback(node): results.append(node) pushtree(TREE1, u"b[x='4']", callback) for result, expected in zip(results, EXPECTED): treecompare.check_xml(result.xml_encode(), XMLDECL+expected) return
def list_sets(self): sets = [] resumptionToken = '' #e.g. http://dspace.mit.edu/oai/request?verb=ListSets params = {'verb': 'ListSets'} while True: if resumptionToken: params['resumptionToken'] = resumptionToken qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() try: content = urllib2.urlopen(url).read() except urllib2.URLError as e: raise OAIHTTPError("list_sets could not make request: %s" % \ e.reason) except urllib2.HTTPError as e: raise OAIHTTPError("list_sets got status %d: %s" % \ (e.code, e.reason)) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) paths = [ u'string(o:setDescription/oai_dc:dc/dc:description)', u'string(o:setDescription/o:oclcdc/dc:description)', u'string(o:setDescription/dc:description)', u'string(o:setDescription)' ] def receive_nodes(n): setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES) setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES) #TODO better solution is to traverse setDescription amara tree for p in paths: setDescription = n.xml_select(p, prefixes=PREFIXES) if setDescription: break sets.append( dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)])) pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES) try: xml_content = XML_PARSE(content) resumptionToken = \ xml_content["OAI-PMH"]["ListSets"].get("resumptionToken","") except KeyError: try: error = xml_content["OAI-PMH"]["error"] raise OAIError(error) except KeyError: raise OAIParseError("Could not parse %s:\n%s" % (url, xml_content)) if isinstance(resumptionToken, dict): resumptionToken = resumptionToken.get("#text", "") # Apply resumptionToken to sets if not resumptionToken: break return sets
def list_sets(self): sets = [] resumptionToken = '' #e.g. http://dspace.mit.edu/oai/request?verb=ListSets params = {'verb' : 'ListSets'} while True: if resumptionToken: params['resumptionToken'] = resumptionToken qstr = urllib.urlencode(params) url = self.root + '?' + qstr self.logger.debug('OAI request URL: {0}'.format(url)) start_t = time.time() try: content = urllib2.urlopen(url).read() except urllib2.URLError as e: raise OAIHTTPError("list_sets could not make request: %s" % \ e.reason) except urllib2.HTTPError as e: raise OAIHTTPError("list_sets got status %d: %s" % \ (e.code, e.reason)) retrieved_t = time.time() self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t)) paths = [ u'string(o:setDescription/oai_dc:dc/dc:description)', u'string(o:setDescription/o:oclcdc/dc:description)', u'string(o:setDescription/dc:description)', u'string(o:setDescription)' ] def receive_nodes(n): setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES) setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES) #TODO better solution is to traverse setDescription amara tree for p in paths: setDescription = n.xml_select(p, prefixes=PREFIXES) if setDescription: break sets.append(dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)])) pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES) try: xml_content = XML_PARSE(content) resumptionToken = \ xml_content["OAI-PMH"]["ListSets"].get("resumptionToken","") except KeyError: try: error = xml_content["OAI-PMH"]["error"] raise OAIError(error) except KeyError: raise OAIParseError("Could not parse %s:\n%s" % (url, xml_content)) if isinstance(resumptionToken, dict): resumptionToken = resumptionToken.get("#text", "") # Apply resumptionToken to sets if not resumptionToken: break return sets
def testnamespaces(self): # This is currently broken. Possible bug in matching code pushtree(self.infile,"/a//q:a",self.callback, namespaces = {"q":"http://spam.com/"}) self.assertEquals(len(self.results),1) self.assertEquals(self.results[0].xml_name,(u'http://spam.com/',u'a'))
def testattribute(self): pushtree(self.infile,"a/*/*/@b",self.callback) self.assertEquals(len(self.results),1) self.assertEquals(self.results[0].xml_name,(u'http://spam.com/',u'a'))
def testnestedelement(self): pushtree(self.infile,"a/c",self.callback) self.assertEquals(len(self.results),1) self.assertEquals(self.results[0].xml_name,(None,u'c'))