コード例 #1
0
 def test_request_authorization(self):
     """Ensure an authorization header is added"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', 'somekey', {'offset': None, 'limit': None})
     r._get_response(200, 20)
     headers = dict(httpretty.last_request().headers)
     assert_equals(headers['authorization'], 'somekey')
コード例 #2
0
 def test_default_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 10, 'limit': 32, 'banana': True}
     copy_of_request_params = copy.deepcopy(request_params)
     resource._default_before(copy_of_request_params)
     # shouldn't do anything!
     assert_equals(request_params, copy_of_request_params)
コード例 #3
0
    def create_zip(self, resource):
        """
        Create the ZIP file matching the current request.

        :return: The ZIP file name
        """
        schema = self.schema()
        ckan_params = dict([(k, v) for (k, v) in self.request_params.items() if schema[k][2]])
        ckan_resource = CkanResource(self.request_params['api_url'],
                                     self.request_params.get('key', None),
                                     self.config['PAGE_SIZE'], ckan_params)
        try:
            self.log.info("Fetching fields")
            # read the datastore fields and determine the backend type
            fields, backend = ckan_resource.get_fields_and_backend()
            
            # write fields to out file as headers
            fields = self._write_headers(resource, fields)

            self.log.info("Fetching records")
            # retrieve the records and write them as we go (ckan_resource.get_records returns a
            # generator)
            self._write_records(ckan_resource.get_records(backend), fields, resource)
            # finalize the resource
            self._finalize_resource(fields, resource)
            # zip the file
            resource.create_zip(self.config['ZIP_COMMAND'])
        finally:
            resource.clean_work_files()
コード例 #4
0
 def test_request_authorization(self):
     """
     Ensure an authorization header is added
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', 'somekey', 1, {'offset': None, 'limit': None})
     list(r.get_records())
     assert_equals(httpretty.last_request().headers['authorization'], 'somekey')
コード例 #5
0
 def test_request_parameters(self):
     """
     Ensure request parameters are passed to the request
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', None, 34, {'carrot': 'cake'})
     list(r.get_records())
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 34,
                                                               'carrot': 'cake'})
コード例 #6
0
    def test_request_failure(self):
        """
        Ensure an exception is raised when the query returns a non-200 status code
        """
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500)
        r = CkanResource('http://somewhere.com/test', None, 1, {'offset': None, 'limit': None})

        with assert_raises(StreamError):
            list(r.get_records())
コード例 #7
0
 def test_request_failure(self):
     """Ensure an exception is raised when the query returns a non-200 status code"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', status=500)
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     try:
         r._get_response(200, 20)
         assert_true(False, "Expected exception StreamError")
     except StreamError:
         pass
コード例 #8
0
 def test_request_url(self):
     """
     Ensure we get a stream queried with the given URL
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     r = CkanResource('http://somewhere.com/test', None, 42, {})
     list(r.get_records())
     assert_equals(httpretty.last_request().path, '/test')
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': 42})
コード例 #9
0
 def test_request_no_limit(self):
     """
     Ensure the page size is used when there is no limit specified
     """
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', body=EMPTY_BODY)
     page_size = 57
     r = CkanResource('http://somewhere.com/test', None, page_size,
                      {'offset': None, 'limit': None})
     list(r.get_records())
     assert_equals(json.loads(httpretty.last_request().body), {'offset': 0, 'limit': page_size})
コード例 #10
0
    def test_request_limit_limit_lower(self):
        """
        If a limit is present it should be used as limit for the number of records to download and
        the overall page size limit should be used to determine how many records should be
        downloaded. However, the lower of the page size and the limit should be used as the CKAN
        request limit to avoid getting far more records than needed. This tests the scenario when
        the requested limit is lower than the page size.
        """
        page_size = 200
        responses = [
            httpretty.Response(json.dumps({'result': {'records': list(range(10))}})),
        ]
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses)

        r = CkanResource('http://somewhere.com/test', None, page_size,
                         {'offset': 4, 'limit': 10})
        records = list(r.get_records())
        assert_equals(len(records), 10)
        assert_equals(json.loads(httpretty.last_request().body), {'offset': 4, 'limit': 10})
コード例 #11
0
    def test_request_limit_size_lower(self):
        """
        If a limit is present it should be used as limit for the number of records to download and
        the overall page size limit should be used to determine how many records should be
        downloaded. However, the lower of the page size and the limit should be used as the CKAN
        request limit to avoid getting far more records than needed. This tests the scenario when
        the page size is lower than the requested limit.
        """
        page_size = 2
        responses = [
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
            httpretty.Response(json.dumps({'result': {'records': list(range(page_size))}})),
        ]
        httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test', responses=responses)

        r = CkanResource('http://somewhere.com/test', None, page_size,
                         {'offset': 4, 'limit': 10})
        records = list(r.get_records())
        assert_equals(len(records), 10)
        # the last request's offset should be 12 because we're requesting 10 records starting at 4
        # and each request size is 2, therefore the requests should be from offsets 4, 6, 8, 10 and
        # 12 at which point the target limit is reached and our work is done
        assert_equals(json.loads(httpretty.last_request().body), {'offset': 12, 'limit': 2})
コード例 #12
0
    def create_zip(self, resource):
        """Create the ZIP file matching the current request

        @return: The ZIP file name
        """
        schema = self.schema()
        ckan_params = dict([(k, v) for (k, v) in self.request_params.items()
                            if schema[k][2]])
        ckan_resource = CkanResource(self.request_params['api_url'],
                                     self.request_params.get('key', None),
                                     ckan_params)
        try:
            # Read the datastore fields, and generate the package structure.
            self.log.info("Fetching fields")
            response = ckan_resource.request(0, 0)

            # Write fields to out file as headers
            fields = self._write_headers(response, resource)

            # If this is a SOLR backend we want to use a cursor
            # This is much faster than the DB search - and prevents duplicates
            cursor = None
            try:
                if response['result']['_backend'] == 'datasolr':
                    cursor = '*'
                    self.log.info("Search type: Solr cursor")
            except KeyError:
                self.log.info("Search type: DB")

            page = 0
            count = 0
            max_count = int(self.request_params.get('limit', 0))
            while True:
                response = ckan_resource.request(page,
                                                 self.config['PAGE_SIZE'],
                                                 cursor)
                # If we've run out of records, break
                if not response['result']['records']:
                    break
                self._write_records(response['result']['records'], fields,
                                    resource)
                if cursor:
                    cursor = response['result']['next_cursor']
                # Start offset - not used for SOLR
                page += 1
                count += len(response['result']['records'])

                if max_count and count >= max_count:
                    break

                try:
                    response['result']['total']
                except KeyError:
                    print response
                if count >= response['result']['total']:
                    break

            # Finalize the resource
            self._finalize_resource(fields, resource)
            # Zip the file
            resource.create_zip(self.config['ZIP_COMMAND'])
        finally:
            resource.clean_work_files()
コード例 #13
0
 def test_solr_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 12}
     resource._solr_before(request_params)
     assert_false('offset' in request_params)
     assert_equals(request_params['cursor'], '*')
コード例 #14
0
 def test_request_limit_overflow(self):
     """Ensure limits are merged when making a request. Test with inner limit causing overflow of outer limit"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': 100, 'limit': 100})
     r._get_response(200, 20)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 4100, 'limit': 20})
コード例 #15
0
 def test_request_limit_no_outer(self):
     """Ensure inner limit is used when no outer limit is defined"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     s = r._get_response(10, 200)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200})
コード例 #16
0
 def test_request_parameters(self):
     """Ensure request parameters are passed to the request"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None, 'carrot': 'cake'})
     s = r._get_response(10, 200)
     self._assert_params_equals(httpretty.last_request().path, {'offset': 2000, 'limit': 200, 'carrot': 'cake'})
コード例 #17
0
 def test_request_url(self):
     """Ensure we get a stream queried with the given URL"""
     httpretty.register_uri(httpretty.POST, 'http://somewhere.com/test')
     r = CkanResource('http://somewhere.com/test', None, {'offset': None, 'limit': None})
     s = r._get_response(0, 0)
     assert_equals(httpretty.last_request().path, '/test?limit=0&offset=0')
コード例 #18
0
 def test_solr_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'cursor': '*'}
     result = {'next_cursor': 'next one!'}
     resource._solr_after(request_params, result)
     assert_equals(request_params['cursor'], 'next one!')
コード例 #19
0
 def test_versioned_datastore_before(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 12}
     resource._versioned_datastore_before(request_params)
     assert_false('offset' in request_params)
コード例 #20
0
 def test_versioned_datastore_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'cursor': '*'}
     result = {'after': 'next one!'}
     resource._versioned_datastore_after(request_params, result)
     assert_equals(request_params['after'], 'next one!')
コード例 #21
0
 def test_default_after(self):
     resource = CkanResource('http://somewhere.com/test', None, 1, {})
     request_params = {'offset': 10, 'limit': 32}
     resource._default_after(request_params, {})
     assert_equals(request_params['offset'], 42)