def test_request_authorization(self): """Ensure an authorization header is added""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test') r = CkanResource('http://somewhere.com/test', 'somekey', {'offset': None, 'limit': None}) r._get_response(200, 20) headers = dict(httpretty.last_request().headers) assert_equals(headers['authorization'], 'somekey')
def test_default_before(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'offset': 10, 'limit': 32, 'banana': True} copy_of_request_params = copy.deepcopy(request_params) resource._default_before(copy_of_request_params) # shouldn't do anything! assert_equals(request_params, copy_of_request_params)
def create_zip(self, resource): """ Create the ZIP file matching the current request. :return: The ZIP file name """ schema = self.schema() ckan_params = dict([(k, v) for (k, v) in self.request_params.items() if schema[k][2]]) ckan_resource = CkanResource(self.request_params['api_url'], self.request_params.get('key', None), self.config['PAGE_SIZE'], ckan_params) try: self.log.info("Fetching fields") # read the datastore fields and determine the backend type fields, backend = ckan_resource.get_fields_and_backend() # write fields to out file as headers fields = self._write_headers(resource, fields) self.log.info("Fetching records") # retrieve the records and write them as we go (ckan_resource.get_records returns a # generator) self._write_records(ckan_resource.get_records(backend), fields, resource) # finalize the resource self._finalize_resource(fields, resource) # zip the file resource.create_zip(self.config['ZIP_COMMAND']) finally: resource.clean_work_files()
def test_request_authorization(self): """ Ensure an authorization header is added """ httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY) r = CkanResource('http://somewhere.com/test', 'somekey', 1, {'offset': None, 'limit': None}) list(r.get_records()) assert_equals(httpretty.last_request().headers['authorization'], 'somekey')
def test_request_parameters(self): """ Ensure request parameters are passed to the request """ httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY) r = CkanResource('http://somewhere.com/test', None, 34, {'carrot': 'cake'}) list(r.get_records()) assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 34, 'carrot': 'cake'})
def test_request_failure(self): """ Ensure an exception is raised when the query returns a non-200 status code """ httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500) r = CkanResource('http://somewhere.com/test', None, 1, {'offset': None, 'limit': None}) with assert_raises(StreamError): list(r.get_records())
def test_request_failure(self): """Ensure an exception is raised when the query returns a non-200 status code""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500) r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None}) try: r._get_response(200, 20) assert_true(False, "Expected exception StreamError") except StreamError: pass
def test_request_url(self): """ Ensure we get a stream queried with the given URL """ httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY) r = CkanResource('http://somewhere.com/test', None, 42, {}) list(r.get_records()) assert_equals(httpretty.last_request().path, '/test') assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 42})
def test_request_no_limit(self): """ Ensure the page size is used when there is no limit specified """ httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY) page_size = 57 r = CkanResource('http://somewhere.com/test', None, page_size, {'offset': None, 'limit': None}) list(r.get_records()) assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': page_size})
def test_request_limit_limit_lower(self): """ If a limit is present it should be used as limit for the number of records to download and the overall page size limit should be used to determine how many records should be downloaded. However, the lower of the page size and the limit should be used as the CKAN request limit to avoid getting far more records than needed. This tests the scenario when the requested limit is lower than the page size. """ page_size = 200 responses = [ httpretty.Response(json.dumps({'result': {'records': list(range(10))}})), ] httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses) r = CkanResource('http://somewhere.com/test', None, page_size, {'offset': 4, 'limit': 10}) records = list(r.get_records()) assert_equals(len(records), 10) assert_equals(json.loads(httpretty.last_request().body), {'offset': 4, 'limit': 10})
def test_request_limit_size_lower(self): """ If a limit is present it should be used as limit for the number of records to download and the overall page size limit should be used to determine how many records should be downloaded. However, the lower of the page size and the limit should be used as the CKAN request limit to avoid getting far more records than needed. This tests the scenario when the page size is lower than the requested limit. """ page_size = 2 responses = [ httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})), httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})), httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})), ] httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses) r = CkanResource('http://somewhere.com/test', None, page_size, {'offset': 4, 'limit': 10}) records = list(r.get_records()) assert_equals(len(records), 10) # the last request's offset should be 12 because we're requesting 10 records starting at 4 # and each request size is 2, therefore the requests should be from offsets 4, 6, 8, 10 and # 12 at which point the target limit is reached and our work is done assert_equals(json.loads(httpretty.last_request().body), {'offset': 12, 'limit': 2})
def create_zip(self, resource): """Create the ZIP file matching the current request @return: The ZIP file name """ schema = self.schema() ckan_params = dict([(k, v) for (k, v) in self.request_params.items() if schema[k][2]]) ckan_resource = CkanResource(self.request_params['api_url'], self.request_params.get('key', None), ckan_params) try: # Read the datastore fields, and generate the package structure. self.log.info("Fetching fields") response = ckan_resource.request(0, 0) # Write fields to out file as headers fields = self._write_headers(response, resource) # If this is a SOLR backend we want to use a cursor # This is much faster than the DB search - and prevents duplicates cursor = None try: if response['result']['_backend'] == 'datasolr': cursor = '*' self.log.info("Search type: Solr cursor") except KeyError: self.log.info("Search type: DB") page = 0 count = 0 max_count = int(self.request_params.get('limit', 0)) while True: response = ckan_resource.request(page, self.config['PAGE_SIZE'], cursor) # If we've run out of records, break if not response['result']['records']: break self._write_records(response['result']['records'], fields, resource) if cursor: cursor = response['result']['next_cursor'] # Start offset - not used for SOLR page += 1 count += len(response['result']['records']) if max_count and count >= max_count: break try: response['result']['total'] except KeyError: print response if count >= response['result']['total']: break # Finalize the resource self._finalize_resource(fields, resource) # Zip the file resource.create_zip(self.config['ZIP_COMMAND']) finally: resource.clean_work_files()
def test_solr_before(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'offset': 12} resource._solr_before(request_params) assert_false('offset' in request_params) assert_equals(request_params['cursor'], '*')
def test_request_limit_overflow(self): """Ensure limits are merged when making a request. Test with inner limit causing overflow of outer limit""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test') r = CkanResource('http://somewhere.com/test', None, {'offset': 100, 'limit': 100}) r._get_response(200, 20) self._assert_params_equals(httpretty.last_request().path, {'offset': 4100, 'limit': 20})
def test_request_limit_no_outer(self): """Ensure inner limit is used when no outer limit is defined""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test') r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None}) s = r._get_response(10, 200) self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200})
def test_request_parameters(self): """Ensure request parameters are passed to the request""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test') r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None, 'carrot': 'cake'}) s = r._get_response(10, 200) self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200, 'carrot': 'cake'})
def test_request_url(self): """Ensure we get a stream queried with the given URL""" httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test') r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None}) s = r._get_response(0, 0) assert_equals(httpretty.last_request().path, '/test?limit=0&offset=0')
def test_solr_after(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'cursor': '*'} result = {'next_cursor': 'next one!'} resource._solr_after(request_params, result) assert_equals(request_params['cursor'], 'next one!')
def test_versioned_datastore_before(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'offset': 12} resource._versioned_datastore_before(request_params) assert_false('offset' in request_params)
def test_versioned_datastore_after(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'cursor': '*'} result = {'after': 'next one!'} resource._versioned_datastore_after(request_params, result) assert_equals(request_params['after'], 'next one!')
def test_default_after(self): resource = CkanResource('http://somewhere.com/test', None, 1, {}) request_params = {'offset': 10, 'limit': 32} resource._default_after(request_params, {}) assert_equals(request_params['offset'], 42)