コード例 #1
0
ファイル: harvest.py プロジェクト: benetech/LearningRegistry
    def list_identifiers(self,h, body ,params, verb = 'GET'):        

        data = self.get_base_response(verb,body)
        if params.has_key('from'):
            data['request']['from'] = params['from']
        if params.has_key('until'):
            data['request']['until'] = params['until']
        from_date, until_date = self._test_time_params(params)
        data['listidentifiers'] =  []
        base_response =  json.dumps(data).split('[')
        self._getServiceDocment(False)        
        resumption_token = None
        if self.enable_flow_control and params.has_key('resumption_token'):
            resumption_token = rt.parse_token(self.service_id,params['resumption_token'])                
        yield base_response[0] +'['
        first = True;
        count = 0
        lastID = None
        lastKey = None
        for d in h.list_identifiers(from_date,until_date,resumption_token=resumption_token, limit=self.limit):
            count += 1
            lastID = d['id']
            lastKey = d['key']
            if not first:
                yield ',\n'
            first = False            
            return_value = {"header":{'identifier':d['id'], 'datestamp':helpers.convertToISO8601Zformat(datetime.today()) ,'status':'active'}}
            yield json.dumps(return_value)
        if self.enable_flow_control and self.limit <= count:
            token = rt.get_token(serviceid=self.service_id,startkey=lastKey,endkey=helpers.convertToISO8601Zformat(until_date),startkey_docid=lastID,from_date=helpers.convertToISO8601Zformat(from_date),until_date=helpers.convertToISO8601Zformat(until_date))
            resp = base_response[1]
            yield resp[:-1] +(',"resumption_token":"%s"' %token) +resp[-1:]
        else:
            yield base_response[1]
コード例 #2
0
ファイル: obtain.py プロジェクト: science/LearningRegistry
    def format_data(self, full_docs, data, currentResumptionToken, keys=[]):
        yield '{"documents":['
        num_sent = 0
        currentID = None
        byIDResponseChunks = None
        count = 0
        lastStartKey = None
        lastId = None
        if data is not None:
            firstID = True
            for doc in data:
                if hasattr(doc, 'id'):
                    lastStartKey = doc.key
                    lastId = doc.id
                    count += 1
                    if full_docs:
                        if doc.key != currentID:
                            currentID = doc.key
                            if not firstID:
                                yield ']' + byIDResponseChunks[1] + ',\n'
                            byIDResponseChunks = json.dumps({
                                'doc_ID': doc.key,
                                'document': []
                            }).split(']')
                            yield byIDResponseChunks[0] + json.dumps(doc.doc)
                            firstID = False
                        else:
                            yield ',\n' + json.dumps(doc.doc)
                    else:
                        if doc.key != currentID:
                            currentID = doc.key
                            if not firstID:
                                yield ',\n'
                            firstID = False
                            yield json.dumps({'doc_ID': doc.key})
        if full_docs and byIDResponseChunks is not None:
            yield ']' + byIDResponseChunks[1]
        if not self.enable_flow_control:
            yield "]}"
        elif count < self.limit:
            yield '], "resumption_token":%s}' % 'null'
        else:
            token_params = {
                "startkey": lastStartKey,
                "endkey": None,
                "startkey_docid": lastId
            }

            if len(keys) > 0:
                token_params["keys"] = keys
            elif currentResumptionToken and "keys" in currentResumptionToken and len(
                    currentResumptionToken["keys"]) > 0:
                token_params["keys"] = currentResumptionToken["keys"]
            token = h.fixUtf8(rt.get_token(self.service_id, **token_params))

            yield '], "resumption_token":"%s"}' % token
コード例 #3
0
 def listGeneral(self, h , body , params, includeDocs,verb = 'GET'):                        
     data = self.get_base_response(verb,body)
     try:
         from_date, until_date = self._test_time_params(params)
     except Exception as ex:
         log.error(ex)
         data['OK'] = False
         data['error'] = 'badArgument'
         yield json.dumps(data)
         return
     data['request']['from'] = from_date
     data['request']['until'] = until_date         
     if from_date > until_date:
       data['OK'] = False
       data['error'] = 'badArgument'
       yield json.dumps(data)
     else:
         self._getServiceDocment(includeDocs)        
         resumption_token = None
         count = 0
         lastID = None
         lastKey = None
         if self.enable_flow_control and params.has_key('resumption_token'):
             resumption_token = rt.parse_token(self.service_id,params['resumption_token'])                                    
         if includeDocs:       
             data['listrecords'] =  []   
             viewResults = h.list_records(from_date,until_date,resumption_token=resumption_token, limit=self.limit)                
             debug_map = lambda doc: {'record':{"header":{'identifier':doc['id'], 'datestamp':doc['key']+"Z",'status':'active'},'resource_data':doc['doc']}}                
         else:
             data['listidentifiers'] =  []   
             viewResults = h.list_identifiers(from_date,until_date,resumption_token=resumption_token, limit=self.limit)
             debug_map = lambda doc:{"header":{'identifier':doc['id'], 'datestamp':doc['key']+"Z",'status':'active'}}                
         base_response =  json.dumps(data).split('[')            
         yield base_response[0] +'['
         first = True
         for data in viewResults:                        
             lastID = data['id']
             lastKey = data['key']
             count += 1
             if not first:
                 yield ',\n'
             first = False
             yield json.dumps(debug_map(data))
         if self.enable_flow_control and self.limit <= count:
             token = rt.get_token(serviceid=self.service_id,startkey=lastKey,endkey=helpers.convertToISO8601Zformat(until_date),startkey_docid=lastID,from_date=helpers.convertToISO8601Zformat(from_date),until_date=helpers.convertToISO8601Zformat(until_date))
             resp = base_response[1]
             yield resp[:-1] +(',"resumption_token":"%s"' %token) +resp[-1:]
         elif self.limit > count:
             resp = base_response[1]
             yield resp[:-1] +(',"resumption_token":"%s"' %'null') +resp[-1:]    
         else:
             yield base_response[1]
コード例 #4
0
    def format_data(self, full_docs, data, currentResumptionToken, keys=[]):
        yield '{"documents":['
        num_sent = 0
        currentID = None
        byIDResponseChunks = None
        count = 0
        lastStartKey = None
        lastId = None
        if data is not None:
            firstID = True
            for doc in data:
                if hasattr(doc,'id'):                
                    lastStartKey = doc.key
                    lastId = doc.id
                    count += 1
                    if full_docs: 
                        if doc.key != currentID:                        
                            currentID = doc.key                        
                            if not firstID:
                                yield ']' + byIDResponseChunks[1] + ',\n'                            
                            byIDResponseChunks = json.dumps({'doc_ID':doc.key,'document':[]}).split(']')
                            yield byIDResponseChunks[0] + json.dumps(doc.doc)                                                                                    
                            firstID = False
                        else:                        
                            yield ',\n' + json.dumps(doc.doc)    
                    else:
                        if doc.key != currentID:
                            currentID = doc.key
                            if not firstID:
                                yield ',\n'
                            firstID = False
                            yield json.dumps({'doc_ID': doc.key})
        if full_docs and byIDResponseChunks is not None:             
            yield ']' + byIDResponseChunks[1]                        
        if  not self.enable_flow_control:			
            yield "]}"
        elif count < self.limit:
            yield '], "resumption_token":%s}' % 'null'
        else:
            token_params = {
                "startkey" : lastStartKey,
                "endkey" : None,
                "startkey_docid" : lastId
            }

            if len(keys) > 0:
                token_params["keys"] = keys
            elif currentResumptionToken and "keys" in currentResumptionToken and len(currentResumptionToken["keys"]) > 0:
                token_params["keys"] = currentResumptionToken["keys"]
            token = h.fixUtf8(rt.get_token(self.service_id,**token_params))

            yield '], "resumption_token":"%s"}' % token
コード例 #5
0
 def format_data(self, full_docs, data, currentResumptionToken):
     yield '{"documents":['
     num_sent = 0
     currentID = None
     byIDResponseChunks = None
     count = 0
     lastStartKey = None
     lastId = None
     if data is not None:
         firstID = True
         for doc in data:
             lastStartKey = doc.key
             lastId = doc.id
             count += 1
             if full_docs:
                 if doc.key != currentID:
                     currentID = doc.key
                     if not firstID:
                         yield ']' + byIDResponseChunks[1] + ',\n'
                     byIDResponseChunks = json.dumps({
                         'doc_ID': doc.key,
                         'document': []
                     }).split(']')
                     yield byIDResponseChunks[0] + json.dumps(doc.doc)
                     firstID = False
                 else:
                     yield ',\n' + json.dumps(doc.doc)
             else:
                 if doc.key != currentID:
                     currentID = doc.key
                     if not firstID:
                         yield ',\n'
                     firstID = False
                     yield json.dumps({'doc_ID': doc.key})
     if full_docs and byIDResponseChunks is not None:
         yield ']' + byIDResponseChunks[1]
     if not self.enable_flow_control:
         yield "]}"
     elif count < self.limit:
         yield '], "resumption_token":%s}' % 'null'
     else:
         token = rt.get_token(self.service_id,
                              startkey=lastStartKey,
                              endkey=None,
                              startkey_docid=lastId)
         yield '], "resumption_token":"%s"}' % token
コード例 #6
0
 def format_data(self, full_docs, data, currentResumptionToken):
     yield '{"documents":['
     num_sent = 0
     currentID = ""
     byIDResponseChunks = None
     count = 0
     lastStartKey = None
     lastId = None
     if data is not None:
         firstID = True
         for doc in data:
             lastStartKey = doc.key
             lastId = doc.id
             count += 1
             if full_docs: 
                 if doc.key != currentID:                        
                     currentID = doc.key                        
                     if not firstID:
                         yield ']' + byIDResponseChunks[1] + ',\n'                            
                     byIDResponseChunks = json.dumps({'doc_ID':doc.key,'document':[]}).split(']')
                     yield byIDResponseChunks[0] + json.dumps(doc.doc)                                                                                    
                     firstID = False
                 else:                        
                     yield ',\n' + json.dumps(doc.doc)    
             else:
                 if doc.key != currentID:
                     currentID = doc.key
                     if not firstID:
                         yield ',\n'
                     firstID = False
                     yield json.dumps({'doc_ID': doc.key})
     if full_docs and byIDResponseChunks is not None:             
         yield ']' + byIDResponseChunks[1]                        
     if count < self.limit or not self.enable_flow_control:			
         yield "]}"
     elif count < self.limit:
         yield '], "resumption_token":%s}' % 'null'
     else:
         token = rt.get_token(self.service_id,startkey=lastStartKey,endkey=None,startkey_docid=lastId)
         yield '], "resumption_token":"%s"}' % token
コード例 #7
0
ファイル: harvest.py プロジェクト: benetech/LearningRegistry
 def list_records(self, h , body , params, verb = 'GET' ):                
     data = self.get_base_response(verb,body)
     if params.has_key('from'):
         data['request']['from'] = params['from']
     if params.has_key('until'):
         data['request']['until'] = params['until']
     from_date, until_date = self._test_time_params(params)
     data['listrecords'] =  []
     self._getServiceDocment(False)        
     resumption_token = None
     count = 0
     lastID = None
     lastKey = None
     if self.enable_flow_control and params.has_key('resumption_token'):
         resumption_token = rt.parse_token(self.service_id,params['resumption_token'])                        
     base_response =  json.dumps(data).split('[')
     yield base_response[0] +'['
     def debug_map(doc):
         data ={'record':{"header":{'identifier':doc['_id'], 'datestamp':helpers.convertToISO8601Zformat(datetime.today()),'status':'active'},'resource_data':doc}}
         return data
     if from_date > until_date:
       data['OK'] = False
       data['error'] = 'badArgument'
     else:
         first = True
         for data in h.list_records(from_date,until_date,resumption_token=resumption_token, limit=self.limit):                        
             lastID = data['id']
             lastKey = data['key']
             doc = data['doc']
             count += 1
             if not first:
                 yield ',\n'
             first = False
             yield json.dumps(debug_map(doc))
     if self.enable_flow_control and self.limit <= count:
         token = rt.get_token(serviceid=self.service_id,startkey=lastKey,endkey=helpers.convertToISO8601Zformat(until_date),startkey_docid=lastID,from_date=helpers.convertToISO8601Zformat(from_date),until_date=helpers.convertToISO8601Zformat(until_date))
         resp = base_response[1]
         yield resp[:-1] +(',"resumption_token":"%s"' %token) +resp[-1:]
     else:
         yield base_response[1]
コード例 #8
0
        def ListGeneric(params, showDocs=False, record_limit=None):
            if not showDocs:
                from lr.mustache.oaipmh import ListIdentifiers as must_ListID
                mustache = must_ListID()
            else:
                from lr.mustache.oaipmh import ListRecords as must_ListRec
                mustache = must_ListRec()

            try:

                doc_index = 0
                err_count = 0
                metadataPrefix = params["metadataPrefix"]
                from_date = params["from"]
                until_date = params["until"]
                doc_err = None
                rendered_init = False
                resumptionToken = None if "resumptionToken" not in params else params[
                    'resumptionToken']
                records = o.list_identifiers_or_records(metadataPrefix,
                                                        from_date=from_date,
                                                        until_date=until_date,
                                                        rt=resumptionToken,
                                                        fc_limit=record_limit,
                                                        include_docs=showDocs)
                for ident in records:
                    doc_index += 1
                    doc_err = False

                    if OAIPMHDocumentResolver.PAYLOAD_ERROR in ident:
                        err_count += 1
                        doc_err = True
                        log.debug(
                            "Payload Error detected, doc_index: {0}, err_count: {1}"
                            .format(doc_index, err_count))

                    if doc_index - err_count == 1:
                        rendered_init = True
                        part = mustache.prefix(
                            **self._initMustache(args=params, req=t_req))
                        yield h.fixUtf8(self._returnResponse(part, res=t_res))

                    if doc_err is False and (record_limit is None
                                             or doc_index <= record_limit):
                        part = mustache.doc(ident)
                        yield h.fixUtf8(part)
                    elif enable_flow_control:
                        from lr.lib import resumption_token
                        if doc_index - err_count > 0 and doc_index > record_limit:
                            opts = o.list_opts(
                                metadataPrefix,
                                h.convertToISO8601UTC(ident["node_timestamp"]),
                                until_date)
                            opts["startkey_docid"] = ident["doc_ID"]
                            token = resumption_token.get_token(
                                serviceid=service_id,
                                from_date=from_date,
                                until_date=until_date,
                                **opts)
                            part = mustache.resumptionToken(token)
                            yield h.fixUtf8(part)
                            break
                        elif doc_index - err_count == 0 and doc_index > record_limit:
                            opts = o.list_opts(
                                metadataPrefix,
                                h.convertToISO8601UTC(ident["node_timestamp"]),
                                until_date)
                            opts["startkey_docid"] = ident["doc_ID"]
                            payload = resumption_token.get_payload(
                                from_date=from_date,
                                until_date=until_date,
                                **opts)
                            records = o.list_identifiers_or_records(
                                metadataPrefix,
                                from_date=from_date,
                                until_date=until_date,
                                rt=payload,
                                fc_limit=record_limit,
                                include_docs=showDocs)
                            doc_index = 0
                            err_count = 0

                if doc_index == 0 and err_count == 0:
                    raise NoRecordsMatchError(params['verb'], req=t_req)
                elif (doc_index - err_count) == 0:
                    raise CannotDisseminateFormatError(params['verb'],
                                                       req=t_req)
                else:
                    if enable_flow_control and doc_index <= record_limit:
                        yield h.fixUtf8(mustache.resumptionToken())
                    yield h.fixUtf8(mustache.suffix())

            except oaipmherrors.Error as e:
                if not rendered_init:
                    from lr.mustache.oaipmh import Error as err_stache
                    err = err_stache()
                    yield h.fixUtf8(self._returnResponse(err.xml(e),
                                                         res=t_res))
                else:
                    from lr.mustache.oaipmh import ErrorOnly as err_stache
                    err = err_stache()
                    yield h.fixUtf8(
                        self._returnResponse(err.xml(e) + mustache.suffix(),
                                             res=t_res))
            except:
                log.exception("Unknown Error Occurred")
コード例 #9
0
        def ListGeneric(params, showDocs=False, record_limit=None):
            if not showDocs:
                from lr.mustache.oaipmh import ListIdentifiers as must_ListID
                mustache = must_ListID()
            else:
                from lr.mustache.oaipmh import ListRecords as must_ListRec
                mustache = must_ListRec()
                
            try:
                
                doc_index = 0
                err_count = 0
                metadataPrefix=params["metadataPrefix"]
                from_date=params["from"]
                until_date=params["until"]
                doc_err = None
                rendered_init = False
                resumptionToken = None if "resumptionToken" not in params else params['resumptionToken']
                records = o.list_identifiers_or_records(metadataPrefix,
                                                from_date=from_date, 
                                                until_date=until_date, 
                                                rt=resumptionToken, 
                                                fc_limit=record_limit, 
                                                include_docs=showDocs )
                for ident in records:
                    doc_index += 1
                    doc_err = False
                    
                    if OAIPMHDocumentResolver.PAYLOAD_ERROR in ident:
                        err_count += 1
                        doc_err = True
                        log.debug("Payload Error detected, doc_index: {0}, err_count: {1}".format(doc_index, err_count))
                    
                    if doc_index - err_count == 1:
                        rendered_init = True
                        part = mustache.prefix(**self._initMustache(args=params, req=t_req))
                        yield h.fixUtf8(self._returnResponse(part, res=t_res))

                    if doc_err is False and (record_limit is None or doc_index <= record_limit):
                        part = mustache.doc(ident)
                        yield h.fixUtf8(part)
                    elif enable_flow_control:
                        from lr.lib import resumption_token
                        if doc_index - err_count > 0 and doc_index > record_limit:
                            opts = o.list_opts(metadataPrefix, h.convertToISO8601UTC(ident["node_timestamp"]), until_date)
                            opts["startkey_docid"] = ident["doc_ID"]
                            token = resumption_token.get_token(serviceid=service_id, from_date=from_date, until_date=until_date, **opts)
                            part = mustache.resumptionToken(token)
                            yield h.fixUtf8(part)
                            break
                        elif doc_index - err_count == 0 and doc_index > record_limit:
                            opts = o.list_opts(metadataPrefix, h.convertToISO8601UTC(ident["node_timestamp"]), until_date)
                            opts["startkey_docid"] = ident["doc_ID"]
                            payload = resumption_token.get_payload(from_date=from_date, until_date=until_date, **opts)
                            records = o.list_identifiers_or_records(metadataPrefix,
                                                from_date=from_date, 
                                                until_date=until_date, 
                                                rt=payload, 
                                                fc_limit=record_limit, 
                                                include_docs=showDocs )
                            doc_index = 0
                            err_count = 0
                
                if doc_index == 0 and err_count == 0:
                    raise NoRecordsMatchError(params['verb'], req=t_req)
                elif (doc_index - err_count) == 0:
                    raise CannotDisseminateFormatError(params['verb'], req=t_req)
                else:
                    if enable_flow_control and doc_index <= record_limit:
                        yield h.fixUtf8(mustache.resumptionToken())
                    yield h.fixUtf8(mustache.suffix())
                    
            except oaipmherrors.Error as e:
                if not rendered_init:
                    from lr.mustache.oaipmh import Error as err_stache
                    err = err_stache()
                    yield h.fixUtf8(self._returnResponse(err.xml(e), res=t_res))
                else:
                    from lr.mustache.oaipmh import ErrorOnly as err_stache
                    err = err_stache()
                    yield h.fixUtf8(self._returnResponse(err.xml(e)+mustache.suffix(), res=t_res))
            except:
                log.exception("Unknown Error Occurred")
コード例 #10
0
ファイル: harvest.py プロジェクト: grrizzly/LearningRegistry
 def listGeneral(self, h, body, params, includeDocs, verb='GET'):
     data = self.get_base_response(verb, body)
     try:
         from_date, until_date = self._test_time_params(params)
     except:
         data['OK'] = False
         data['error'] = 'badArgument'
         yield json.dumps(data)
         return
     data['request']['from'] = from_date
     data['request']['until'] = until_date
     if from_date > until_date:
         data['OK'] = False
         data['error'] = 'badArgument'
         yield json.dumps(data)
     else:
         self._getServiceDocment(includeDocs)
         resumption_token = None
         count = 0
         lastID = None
         lastKey = None
         if self.enable_flow_control and params.has_key('resumption_token'):
             resumption_token = rt.parse_token(self.service_id,
                                               params['resumption_token'])
         if includeDocs:
             data['listrecords'] = []
             viewResults = h.list_records(from_date,
                                          until_date,
                                          resumption_token=resumption_token,
                                          limit=self.limit)
             debug_map = lambda doc: {
                 'record': {
                     "header": {
                         'identifier': doc['id'],
                         'datestamp': doc['key'] + "Z",
                         'status': 'active'
                     },
                     'resource_data': doc['doc']
                 }
             }
         else:
             data['listidentifiers'] = []
             viewResults = h.list_identifiers(
                 from_date,
                 until_date,
                 resumption_token=resumption_token,
                 limit=self.limit)
             debug_map = lambda doc: {
                 "header": {
                     'identifier': doc['id'],
                     'datestamp': doc['key'] + "Z",
                     'status': 'active'
                 }
             }
         base_response = json.dumps(data).split('[')
         yield base_response[0] + '['
         first = True
         for data in viewResults:
             lastID = data['id']
             lastKey = data['key']
             count += 1
             if not first:
                 yield ',\n'
             first = False
             yield json.dumps(debug_map(data))
         if self.enable_flow_control and self.limit <= count:
             token = rt.get_token(
                 serviceid=self.service_id,
                 startkey=lastKey,
                 endkey=helpers.convertToISO8601Zformat(until_date),
                 startkey_docid=lastID,
                 from_date=helpers.convertToISO8601Zformat(from_date),
                 until_date=helpers.convertToISO8601Zformat(until_date))
             resp = base_response[1]
             yield resp[:-1] + (',"resumption_token":"%s"' %
                                token) + resp[-1:]
         elif self.limit > count:
             resp = base_response[1]
             yield resp[:-1] + (',"resumption_token":"%s"' %
                                'null') + resp[-1:]
         else:
             yield base_response[1]