def test_verbs(self):
     """oairepository - testing verbs"""
     self.assertNotEqual(None, re.search("Identify", oai_repository_server.oaiidentify("", None)))
     self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository_server.oailistidentifiers("")))
     self.assertNotEqual(None, re.search("ListRecords", oai_repository_server.oailistrecords("")))
     self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oailistmetadataformats("")))
     self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oailistsets("")))
     self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oaigetrecord("")))
Example #2
0
 def test_verbs(self):
     """oairepository - testing verbs"""
     self.assertNotEqual(None, re.search("Identify", oai_repository_server.oaiidentify("", None)))
     self.assertNotEqual(None, re.search("ListIdentifiers", oai_repository_server.oailistidentifiers("")))
     self.assertNotEqual(None, re.search("ListRecords", oai_repository_server.oailistrecords("")))
     self.assertNotEqual(None, re.search("ListMetadataFormats", oai_repository_server.oailistmetadataformats("")))
     self.assertNotEqual(None, re.search("ListSets", oai_repository_server.oailistsets("")))
     self.assertNotEqual(None, re.search("GetRecord", oai_repository_server.oaigetrecord("")))
    def test_from_and_until(self):
        """oairepository - testing selective harvesting with 'from' and 'until' parameters"""

        # List available records, get datestamps and play with them
        identifiers = oai_repository_server.oailistidentifiers("")
        datestamps = re.findall('<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>', identifiers)

        sample_datestamp = datestamps[0][1] # Take one datestamp
        sample_oai_id = datestamps[0][0] # Take corresponding oai id
        sample_id = search_engine.perform_request_search(p=sample_oai_id,
                                                         f=CFG_OAI_ID_FIELD)[0] # Find corresponding system number id

        # There must be some datestamps
        self.assertNotEqual([], datestamps)

        # We must be able to retrieve an id with the date we have just found
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp, \
                                                                 fromdate=sample_datestamp))

        # Same, with short format date. Eg 2007-12-13
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0]))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp.split('T')[0]))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0], \
                                                                 untildate=sample_datestamp.split('T')[0]))

        # At later date (year after) we should not find our id again
        sample_datestamp_year = int(sample_datestamp[0:4])
        sample_datestamp_rest = sample_datestamp[4:]
        later_datestamp = str(sample_datestamp_year + 1) + sample_datestamp_rest
        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(fromdate=later_datestamp))

        # At earlier date (year before) we should not find our id again
        earlier_datestamp = str(sample_datestamp_year - 1) + sample_datestamp_rest
        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(untildate=earlier_datestamp))

        # From earliest date to latest date must include all oai records
        dates = [(time.mktime(time.strptime(date[1], "%Y-%m-%dT%H:%M:%SZ")), date[1]) for date in datestamps]
        dates = dict(dates)
        sorted_times = dates.keys()
        sorted_times.sort()
        earliest_datestamp = dates[sorted_times[0]]
        latest_datestamp = dates[sorted_times[-1]]
        self.assertEqual(len(oai_repository_server.oaigetsysnolist()), \
                         len(oai_repository_server.oaigetsysnolist(fromdate=earliest_datestamp, \
                                                            untildate=latest_datestamp)))
    def test_from_and_until(self):
        """oairepository - testing selective harvesting with 'from' and 'until' parameters"""

        # List available records, get datestamps and play with them
        identifiers = oai_repository_server.oailistidentifiers("")
        datestamps = re.findall('<identifier>(?P<id>.*)</identifier>\s*<datestamp>(?P<date>.*)</datestamp>', identifiers)

        sample_datestamp = datestamps[0][1] # Take one datestamp
        sample_oai_id = datestamps[0][0] # Take corresponding oai id
        sample_id = search_engine.perform_request_search(p=sample_oai_id,
                                                         f=CFG_OAI_ID_FIELD)[0] # Find corresponding system number id

        # There must be some datestamps
        self.assertNotEqual([], datestamps)

        # We must be able to retrieve an id with the date we have just found
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp, \
                                                                 fromdate=sample_datestamp))

        # Same, with short format date. Eg 2007-12-13
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0]))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(untildate=sample_datestamp.split('T')[0]))
        self.assert_(sample_id in oai_repository_server.oaigetsysnolist(fromdate=sample_datestamp.split('T')[0], \
                                                                 untildate=sample_datestamp.split('T')[0]))

        # At later date (year after) we should not find our id again
        sample_datestamp_year = int(sample_datestamp[0:4])
        sample_datestamp_rest = sample_datestamp[4:]
        later_datestamp = str(sample_datestamp_year + 1) + sample_datestamp_rest
        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(fromdate=later_datestamp))

        # At earlier date (year before) we should not find our id again
        earlier_datestamp = str(sample_datestamp_year - 1) + sample_datestamp_rest
        self.assert_(sample_id not in oai_repository_server.oaigetsysnolist(untildate=earlier_datestamp))

        # From earliest date to latest date must include all oai records
        dates = [(time.mktime(time.strptime(date[1], "%Y-%m-%dT%H:%M:%SZ")), date[1]) for date in datestamps]
        dates = dict(dates)
        sorted_times = dates.keys()
        sorted_times.sort()
        earliest_datestamp = dates[sorted_times[0]]
        latest_datestamp = dates[sorted_times[-1]]
        self.assertEqual(len(oai_repository_server.oaigetsysnolist()), \
                         len(oai_repository_server.oaigetsysnolist(fromdate=earliest_datestamp, \
                                                            untildate=latest_datestamp)))
    def __call__(self, req, form):
        "OAI repository interface"

        # Clean input arguments. The protocol specifies that an error
        # has to be returned if the same argument is specified several
        # times. Eg:
        # oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml
        # So keep the arguments as list for now so that check_argd can
        # return an error if needed (check_argd also transforms these
        # lists into strings)
        argd = wash_urlargd(form, {'verb': (list, []),
                                   'metadataPrefix': (list, []),
                                   'from': (list, []),
                                   'until': (list, []),
                                   'set': (list, []),
                                   'identifier': (list, []),
                                   'resumptionToken': (list, []),
                                   })

        ## wash_urlargd(..) function cleaned everything, but also added
        ## unwanted parameters. Remove them now
        for param in argd.keys():
            if not param in form and param != 'verb':
                del argd[param]

        ## wash_urlargd(..) function also removed unknown parameters
        ## that we would like to keep in order to send back an error
        ## as required by the protocol. But we do not need that value,
        ## so set it to empty string.
        for param in form.keys():
            if param not in argd.keys():
                argd[param] = ''

        ## But still remove 'ln' parameter that was automatically added.
        if argd.has_key('ln'):
            del argd['ln']

        ## check request for OAI compliancy
        ## also transform all the list arguments into string
        oai_error = oai_repository_server.check_argd(argd)

        ## check availability (OAI requests for Identify, ListSets and
        ## ListMetadataFormats are served immediately, otherwise we
        ## shall wait for CFG_OAI_SLEEP seconds between requests):
        if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"]):
            time_gap = int(time.time() - os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR))
            if(time_gap < CFG_OAI_SLEEP):
                req.headers_out["Status-Code"] = "503"
                req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP - time_gap)
                req.status = apache.HTTP_SERVICE_UNAVAILABLE
                return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap)
        command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR
        os.system(command)

        ## construct args (argd string equivalent) for the
        ## oai_repository_server business logic (later it may be good if it
        ## takes argd directly):
        args = urllib.urlencode(argd)

        ## create OAI response

        req.content_type = "text/xml"
        req.send_http_header()

        if oai_error == "":

            ## OAI Identify

            if argd['verb']   == "Identify":
                req.write(oai_repository_server.oaiidentify(args, script_url=req.uri))


            ## OAI ListSets

            elif argd['verb'] == "ListSets":
                req.write(oai_repository_server.oailistsets(args))


            ## OAI ListIdentifiers

            elif argd['verb'] == "ListIdentifiers":
                req.write(oai_repository_server.oailistidentifiers(args))


            ## OAI ListRecords

            elif argd['verb'] == "ListRecords":
                req.write(oai_repository_server.oailistrecords(args))


            ## OAI GetRecord

            elif argd['verb'] == "GetRecord":
                req.write(oai_repository_server.oaigetrecord(args))


            ## OAI ListMetadataFormats

            elif argd['verb'] == "ListMetadataFormats":
                req.write(oai_repository_server.oailistmetadataformats(args))


            ## Unknown verb

            else:
                req.write(oai_repository_server.oai_error("badVerb","Illegal OAI verb"))


        ## OAI error

        else:
            req.write(oai_repository_server.oai_header(args,""))
            req.write(oai_error)
            req.write(oai_repository_server.oai_footer(""))

        return "\n"
    def __call__(self, req, form):
        "OAI repository interface"

        # Clean input arguments. The protocol specifies that an error
        # has to be returned if the same argument is specified several
        # times. Eg:
        # oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml
        # So keep the arguments as list for now so that check_argd can
        # return an error if needed (check_argd also transforms these
        # lists into strings)
        argd = wash_urlargd(
            form, {
                'verb': (list, []),
                'metadataPrefix': (list, []),
                'from': (list, []),
                'until': (list, []),
                'set': (list, []),
                'identifier': (list, []),
                'resumptionToken': (list, []),
            })

        ## wash_urlargd(..) function cleaned everything, but also added
        ## unwanted parameters. Remove them now
        for param in argd.keys():
            if not param in form and param != 'verb':
                del argd[param]

        ## wash_urlargd(..) function also removed unknown parameters
        ## that we would like to keep in order to send back an error
        ## as required by the protocol. But we do not need that value,
        ## so set it to empty string.
        for param in form.keys():
            if param not in argd.keys():
                argd[param] = ''

        ## But still remove 'ln' parameter that was automatically added.
        if argd.has_key('ln'):
            del argd['ln']

        ## check request for OAI compliancy
        ## also transform all the list arguments into string
        oai_error = oai_repository_server.check_argd(argd)

        ## check availability (OAI requests for Identify, ListSets and
        ## ListMetadataFormats are served immediately, otherwise we
        ## shall wait for CFG_OAI_SLEEP seconds between requests):
        if os.path.exists(
                "%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in [
                    "Identify", "ListMetadataFormats", "ListSets"
                ]):
            time_gap = int(time.time() -
                           os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR))
            if (time_gap < CFG_OAI_SLEEP):
                req.headers_out["Status-Code"] = "503"
                req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP -
                                                         time_gap)
                req.status = apache.HTTP_SERVICE_UNAVAILABLE
                return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap)
        command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR
        os.system(command)

        ## construct args (argd string equivalent) for the
        ## oai_repository_server business logic (later it may be good if it
        ## takes argd directly):
        args = urllib.urlencode(argd)

        ## create OAI response

        req.content_type = "text/xml"
        req.send_http_header()

        if oai_error == "":

            ## OAI Identify

            if argd['verb'] == "Identify":
                req.write(
                    oai_repository_server.oaiidentify(args,
                                                      script_url=req.uri))

            ## OAI ListSets

            elif argd['verb'] == "ListSets":
                req.write(oai_repository_server.oailistsets(args))

            ## OAI ListIdentifiers

            elif argd['verb'] == "ListIdentifiers":
                req.write(oai_repository_server.oailistidentifiers(args))

            ## OAI ListRecords

            elif argd['verb'] == "ListRecords":
                req.write(oai_repository_server.oailistrecords(args))

            ## OAI GetRecord

            elif argd['verb'] == "GetRecord":
                req.write(oai_repository_server.oaigetrecord(args))

            ## OAI ListMetadataFormats

            elif argd['verb'] == "ListMetadataFormats":
                req.write(oai_repository_server.oailistmetadataformats(args))

            ## Unknown verb

            else:
                req.write(
                    oai_repository_server.oai_error("badVerb",
                                                    "Illegal OAI verb"))

        ## OAI error

        else:
            req.write(oai_repository_server.oai_header(args, ""))
            req.write(oai_error)
            req.write(oai_repository_server.oai_footer(""))

        return "\n"