Ejemplo n.º 1
0
    def list_sets(self):
        #e.g. http://dspace.mit.edu/oai/request?verb=ListSets
        qstr = urllib.urlencode({'verb' : 'ListSets'})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        sets = []

        paths = [
            u'string(o:setDescription/oai_dc:dc/dc:description)',
            u'string(o:setDescription/o:oclcdc/dc:description)',
            u'string(o:setDescription/dc:description)',
            u'string(o:setDescription)'            
        ]
        def receive_nodes(n):
            setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES)
            setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES)
            #TODO better solution is to traverse setDescription amara tree
            for p in paths:
                setDescription = n.xml_select(p, prefixes=PREFIXES)
                if setDescription:
                    break
            sets.append(dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)]))

        pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES)
        return sets
Ejemplo n.º 2
0
    def list_sets(self):
        #e.g. http://dspace.mit.edu/oai/request?verb=ListSets
        qstr = urllib.urlencode({'verb' : 'ListSets'})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        sets = []

        paths = [
            u'string(o:setDescription/oai_dc:dc/dc:description)',
            u'string(o:setDescription/o:oclcdc/dc:description)',
            u'string(o:setDescription/dc:description)',
            u'string(o:setDescription)'            
        ]
        def receive_nodes(n):
            setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES)
            setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES)
            #TODO better solution is to traverse setDescription amara tree
            for p in paths:
                setDescription = n.xml_select(p, prefixes=PREFIXES)
                if setDescription:
                    break
            sets.append(dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)]))

        pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES)
        return sets
Ejemplo n.º 3
0
 def compare_matches(self, xpath):
     del self.results[:]
     select_ids = set(node.xml_attributes["x"]
                             for node in TREEDOC.xml_select("//"+xpath))
     pushtree(TREE1, xpath, self.callback)
     push_ids = set(self.results)
     self.assertEquals(select_ids, push_ids)
Ejemplo n.º 4
0
 def testsimpleelement(self):
     pushtree(self.infile,"a",self.callback)
     self.assertEquals(len(self.results), 2)
     expected_names = [
         (u'http://spam.com/', u'a'), # XXX this should not be expected?
         (None, u'a')
     ]
     for node,ename in zip(self.results,expected_names):
         self.assertEquals(node.xml_name,ename)
Ejemplo n.º 5
0
def test_1():
    EXPECTED = ['<a>0</a>', '<a>1</a>', '<a>10</a>', '<a>11</a>']
    results = []

    def callback(node):
        results.append(node)

    pushtree(XML1, u"a", callback)

    for result, expected in zip(results, EXPECTED):
        treecompare.check_xml(result.xml_encode(), XMLDECL+expected)
    return
Ejemplo n.º 6
0
def test_predicate2():
    EXPECTED = ['<one>repeat</one>']
    results = []

    def callback(node):
        results.append(node)

    pushtree(XML5, u"doc/one[2]", callback)

    for result, expected in zip(results, EXPECTED):
        treecompare.check_xml(result.xml_encode(), XMLDECL+expected)
    return
Ejemplo n.º 7
0
def test_4():
    EXPECTED = ['<x:a xmlns:x="urn:bogus:x">0</x:a>', '<x:a xmlns:x="urn:bogus:x">1</x:a>']
    results = []

    def callback(node):
        results.append(node)

    pushtree(XML4, u"x:a", callback, namespaces = {"x": "urn:bogus:x"})

    for result, expected in zip(results, EXPECTED):
        treecompare.check_xml(result.xml_encode(), XMLDECL+expected)

    return
Ejemplo n.º 8
0
def test_2():
    EXPECTED = ['<a xmlns="urn:bogus:x">0</a>', '<a xmlns="urn:bogus:x">1</a>', '<a xmlns="urn:bogus:x">10</a>', '<a xmlns="urn:bogus:x">11</a>']
    results = []

    def callback(node):
        results.append(node)

    pushtree(XML2, u"a", callback, namespaces = {None: "urn:bogus:x"})

    for result, expected in zip(results, EXPECTED):
        treecompare.check_xml(result.xml_encode(), XMLDECL+expected)

    return
Ejemplo n.º 9
0
Archivo: oai.py Proyecto: dpla/zen
    def list_sets(self):
        #e.g. http://dspace.mit.edu/oai/request?verb=ListSets
        qstr = urllib.urlencode({'verb' : 'ListSets'})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        sets = []

        def receive_nodes(n):
            sets.append((n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES), n.xml_select(u'string(o:setName)', prefixes=PREFIXES)))

        pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES)
        return sets
Ejemplo n.º 10
0
def produce_csv(doc,csvWriter,justCount,projectVars):
    if not projectVars:
        def receive_header(node):
            for var in node.variable:
                projectVars.append(U(var.name))

        pushtree(doc, u'head', receive_header, entity_factory=entity_base)

    cnt=Counter()

    @coroutine
    def receive_nodes(cnt):
        while True:
            node = yield
            if justCount:
                cnt.counter+=1
            else:
                rt=[]
                badChars = False
                bindings = {}
                for binding in node.binding:
                    try:
                        newterm=U(binding).encode('ascii')
                    except UnicodeEncodeError:
                        newterm=U(binding).encode('ascii', 'ignore')
                        badChars = True
                        print >> sys.stderr, "Skipping character"
                    if newterm:
                        if projectVars:
                            bindings[binding.name]=newterm
                        else:
                            rt.append(newTerm)

                for head in projectVars:
                    rt.append(bindings.get(head,''))
                if badChars:
                    cnt.skipCounter += 1
                csvWriter.writerow(rt)
        return

    target = receive_nodes(cnt)
    pushtree(doc, u'result', target.send, entity_factory=entity_base)
    target.close()
    return cnt
Ejemplo n.º 11
0
def test_predicate1():
    EXPECTED = ['''<b x='4'>
        <d x='5' />
        <e x='6' />
        <d x='7' />
        <b x='8' />
        <c x='9' />
      </b>''']
    results = []
    raise KnownFailure("No predicates support.  See: http://trac.xml3k.org/ticket/23")


    def callback(node):
        results.append(node)

    pushtree(TREE1, u"b[x='4']", callback)

    for result, expected in zip(results, EXPECTED):
        treecompare.check_xml(result.xml_encode(), XMLDECL+expected)
    return
Ejemplo n.º 12
0
    def list_sets(self):

        sets = []
        resumptionToken = ''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListSets
        params = {'verb': 'ListSets'}

        while True:

            if resumptionToken:
                params['resumptionToken'] = resumptionToken

            qstr = urllib.urlencode(params)

            url = self.root + '?' + qstr
            self.logger.debug('OAI request URL: {0}'.format(url))
            start_t = time.time()
            try:
                content = urllib2.urlopen(url).read()
            except urllib2.URLError as e:
                raise OAIHTTPError("list_sets could not make request: %s" % \
                                   e.reason)
            except urllib2.HTTPError as e:
                raise OAIHTTPError("list_sets got status %d: %s" % \
                                   (e.code, e.reason))
            retrieved_t = time.time()
            self.logger.debug('Retrieved in {0}s'.format(retrieved_t -
                                                         start_t))

            paths = [
                u'string(o:setDescription/oai_dc:dc/dc:description)',
                u'string(o:setDescription/o:oclcdc/dc:description)',
                u'string(o:setDescription/dc:description)',
                u'string(o:setDescription)'
            ]

            def receive_nodes(n):
                setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES)
                setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES)
                #TODO better solution is to traverse setDescription amara tree
                for p in paths:
                    setDescription = n.xml_select(p, prefixes=PREFIXES)
                    if setDescription:
                        break
                sets.append(
                    dict([('setSpec', setSpec), ('setName', setName),
                          ('setDescription', setDescription)]))

            pushtree(content,
                     u"o:OAI-PMH/o:ListSets/o:set",
                     receive_nodes,
                     namespaces=PREFIXES)
            try:
                xml_content = XML_PARSE(content)

                resumptionToken = \
                    xml_content["OAI-PMH"]["ListSets"].get("resumptionToken","")
            except KeyError:
                try:
                    error = xml_content["OAI-PMH"]["error"]
                    raise OAIError(error)
                except KeyError:
                    raise OAIParseError("Could not parse %s:\n%s" %
                                        (url, xml_content))
            if isinstance(resumptionToken, dict):
                resumptionToken = resumptionToken.get("#text", "")

            # Apply resumptionToken to sets
            if not resumptionToken:
                break

        return sets
Ejemplo n.º 13
0
    def list_sets(self):

        sets = []
        resumptionToken = ''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListSets
        params = {'verb' : 'ListSets'}

        while True:

            if resumptionToken:
                params['resumptionToken'] = resumptionToken

            qstr = urllib.urlencode(params)

            url = self.root + '?' + qstr
            self.logger.debug('OAI request URL: {0}'.format(url))
            start_t = time.time()
            try:
                content = urllib2.urlopen(url).read()
            except urllib2.URLError as e:
                raise OAIHTTPError("list_sets could not make request: %s" % \
                                   e.reason)
            except urllib2.HTTPError as e:
                raise OAIHTTPError("list_sets got status %d: %s" % \
                                   (e.code, e.reason))
            retrieved_t = time.time()
            self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

            paths = [
                u'string(o:setDescription/oai_dc:dc/dc:description)',
                u'string(o:setDescription/o:oclcdc/dc:description)',
                u'string(o:setDescription/dc:description)',
                u'string(o:setDescription)'
            ]
            def receive_nodes(n):
                setSpec = n.xml_select(u'string(o:setSpec)', prefixes=PREFIXES)
                setName = n.xml_select(u'string(o:setName)', prefixes=PREFIXES)
                #TODO better solution is to traverse setDescription amara tree
                for p in paths:
                    setDescription = n.xml_select(p, prefixes=PREFIXES)
                    if setDescription:
                        break
                sets.append(dict([('setSpec', setSpec), ('setName', setName), ('setDescription', setDescription)]))

            pushtree(content, u"o:OAI-PMH/o:ListSets/o:set", receive_nodes, namespaces=PREFIXES)
            try:
                xml_content = XML_PARSE(content)

                resumptionToken = \
                    xml_content["OAI-PMH"]["ListSets"].get("resumptionToken","")
            except KeyError:
                try:
                    error = xml_content["OAI-PMH"]["error"]
                    raise OAIError(error)
                except KeyError:
                    raise OAIParseError("Could not parse %s:\n%s" % (url, xml_content))
            if isinstance(resumptionToken, dict):
                resumptionToken = resumptionToken.get("#text", "")

            # Apply resumptionToken to sets
            if not resumptionToken:
                break

        return sets
Ejemplo n.º 14
0
 def testnamespaces(self):
     # This is currently broken.  Possible bug in matching code
     pushtree(self.infile,"/a//q:a",self.callback,
              namespaces = {"q":"http://spam.com/"})
     self.assertEquals(len(self.results),1)
     self.assertEquals(self.results[0].xml_name,(u'http://spam.com/',u'a'))
Ejemplo n.º 15
0
 def testattribute(self):
     pushtree(self.infile,"a/*/*/@b",self.callback)
     self.assertEquals(len(self.results),1)
     self.assertEquals(self.results[0].xml_name,(u'http://spam.com/',u'a'))
Ejemplo n.º 16
0
 def testnestedelement(self):
     pushtree(self.infile,"a/c",self.callback)
     self.assertEquals(len(self.results),1)
     self.assertEquals(self.results[0].xml_name,(None,u'c'))