def test_request_authorization(self):
     """Ensure an authorization header is added"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', 'somekey', {'offset': None, 'limit': None})
     r._get_response(200, 20)
     headers = dict(httpretty.last_request().headers)
     assert_equals(headers['authorization'], 'somekey')
 def test_default_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 10, 'limit': 32, 'banana': True}
     copy_of_request_params = copy.deepcopy(request_params)
     resource._default_before(copy_of_request_params)
     # shouldn't do anything!
     assert_equals(request_params, copy_of_request_params)
    def create_zip(self, resource):
        """
        Create the ZIP file matching the current request.

        :return: The ZIP file name
        """
        schema = self.schema()
        ckan_params = dict([(k, v) for (k, v) in self.request_params.items() if schema[k][2]])
        ckan_resource = CkanResource(self.request_params['api_url'],
                                     self.request_params.get('key', None),
                                     self.config['PAGE_SIZE'], ckan_params)
        try:
            self.log.info("Fetching fields")
            # read the datastore fields and determine the backend type
            fields, backend = ckan_resource.get_fields_and_backend()
            
            # write fields to out file as headers
            fields = self._write_headers(resource, fields)

            self.log.info("Fetching records")
            # retrieve the records and write them as we go (ckan_resource.get_records returns a
            # generator)
            self._write_records(ckan_resource.get_records(backend), fields, resource)
            # finalize the resource
            self._finalize_resource(fields, resource)
            # zip the file
            resource.create_zip(self.config['ZIP_COMMAND'])
        finally:
            resource.clean_work_files()
 def test_request_authorization(self):
     """
     Ensure an authorization header is added
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', 'somekey', 1, {'offset': None, 'limit': None})
     list(r.get_records())
     assert_equals(httpretty.last_request().headers['authorization'], 'somekey')
 def test_request_parameters(self):
     """
     Ensure request parameters are passed to the request
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', None, 34, {'carrot': 'cake'})
     list(r.get_records())
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 34,
                                                               'carrot': 'cake'})
    def test_request_failure(self):
        """
        Ensure an exception is raised when the query returns a non-200 status code
        """
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500)
        r = CkanResource('http://somewhere.com/test', None, 1, {'offset': None, 'limit': None})

        with assert_raises(StreamError):
            list(r.get_records())
 def test_request_failure(self):
     """Ensure an exception is raised when the query returns a non-200 status code"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500)
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     try:
         r._get_response(200, 20)
         assert_true(False, "Expected exception StreamError")
     except StreamError:
         pass
 def test_request_url(self):
     """
     Ensure we get a stream queried with the given URL
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', None, 42, {})
     list(r.get_records())
     assert_equals(httpretty.last_request().path, '/test')
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 42})
 def test_request_no_limit(self):
     """
     Ensure the page size is used when there is no limit specified
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     page_size = 57
     r = CkanResource('http://somewhere.com/test', None, page_size,
                      {'offset': None, 'limit': None})
     list(r.get_records())
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': page_size})
    def test_request_limit_limit_lower(self):
        """
        If a limit is present it should be used as limit for the number of records to download and
        the overall page size limit should be used to determine how many records should be
        downloaded. However, the lower of the page size and the limit should be used as the CKAN
        request limit to avoid getting far more records than needed. This tests the scenario when
        the requested limit is lower than the page size.
        """
        page_size = 200
        responses = [
            httpretty.Response(json.dumps({'result': {'records': list(range(10))}})),
        ]
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses)

        r = CkanResource('http://somewhere.com/test', None, page_size,
                         {'offset': 4, 'limit': 10})
        records = list(r.get_records())
        assert_equals(len(records), 10)
        assert_equals(json.loads(httpretty.last_request().body), {'offset': 4, 'limit': 10})
    def test_request_limit_size_lower(self):
        """
        If a limit is present it should be used as limit for the number of records to download and
        the overall page size limit should be used to determine how many records should be
        downloaded. However, the lower of the page size and the limit should be used as the CKAN
        request limit to avoid getting far more records than needed. This tests the scenario when
        the page size is lower than the requested limit.
        """
        page_size = 2
        responses = [
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
        ]
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses)

        r = CkanResource('http://somewhere.com/test', None, page_size,
                         {'offset': 4, 'limit': 10})
        records = list(r.get_records())
        assert_equals(len(records), 10)
        # the last request's offset should be 12 because we're requesting 10 records starting at 4
        # and each request size is 2, therefore the requests should be from offsets 4, 6, 8, 10 and
        # 12 at which point the target limit is reached and our work is done
        assert_equals(json.loads(httpretty.last_request().body), {'offset': 12, 'limit': 2})
Esempio n. 12
0
    def create_zip(self, resource):
        """Create the ZIP file matching the current request

        @return: The ZIP file name
        """
        schema = self.schema()
        ckan_params = dict([(k, v) for (k, v) in self.request_params.items()
                            if schema[k][2]])
        ckan_resource = CkanResource(self.request_params['api_url'],
                                     self.request_params.get('key', None),
                                     ckan_params)
        try:
            # Read the datastore fields, and generate the package structure.
            self.log.info("Fetching fields")
            response = ckan_resource.request(0, 0)

            # Write fields to out file as headers
            fields = self._write_headers(response, resource)

            # If this is a SOLR backend we want to use a cursor
            # This is much faster than the DB search - and prevents duplicates
            cursor = None
            try:
                if response['result']['_backend'] == 'datasolr':
                    cursor = '*'
                    self.log.info("Search type: Solr cursor")
            except KeyError:
                self.log.info("Search type: DB")

            page = 0
            count = 0
            max_count = int(self.request_params.get('limit', 0))
            while True:
                response = ckan_resource.request(page,
                                                 self.config['PAGE_SIZE'],
                                                 cursor)
                # If we've run out of records, break
                if not response['result']['records']:
                    break
                self._write_records(response['result']['records'], fields,
                                    resource)
                if cursor:
                    cursor = response['result']['next_cursor']
                # Start offset - not used for SOLR
                page += 1
                count += len(response['result']['records'])

                if max_count and count >= max_count:
                    break

                try:
                    response['result']['total']
                except KeyError:
                    print response
                if count >= response['result']['total']:
                    break

            # Finalize the resource
            self._finalize_resource(fields, resource)
            # Zip the file
            resource.create_zip(self.config['ZIP_COMMAND'])
        finally:
            resource.clean_work_files()
 def test_solr_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 12}
     resource._solr_before(request_params)
     assert_false('offset' in request_params)
     assert_equals(request_params['cursor'], '*')
 def test_request_limit_overflow(self):
     """Ensure limits are merged when making a request. Test with inner limit causing overflow of outer limit"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': 100, 'limit': 100})
     r._get_response(200, 20)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 4100, 'limit': 20})
 def test_request_limit_no_outer(self):
     """Ensure inner limit is used when no outer limit is defined"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     s = r._get_response(10, 200)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200})
 def test_request_parameters(self):
     """Ensure request parameters are passed to the request"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None, 'carrot': 'cake'})
     s = r._get_response(10, 200)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200, 'carrot': 'cake'})
 def test_request_url(self):
     """Ensure we get a stream queried with the given URL"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     s = r._get_response(0, 0)
     assert_equals(httpretty.last_request().path, '/test?limit=0&offset=0')
 def test_solr_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'cursor': '*'}
     result = {'next_cursor': 'next one!'}
     resource._solr_after(request_params, result)
     assert_equals(request_params['cursor'], 'next one!')
 def test_versioned_datastore_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 12}
     resource._versioned_datastore_before(request_params)
     assert_false('offset' in request_params)
 def test_versioned_datastore_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'cursor': '*'}
     result = {'after': 'next one!'}
     resource._versioned_datastore_after(request_params, result)
     assert_equals(request_params['after'], 'next one!')
 def test_default_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 10, 'limit': 32}
     resource._default_after(request_params, {})
     assert_equals(request_params['offset'], 42)