Ejemplo n.º 1
0
    def get_hash(self, resource_id):
        """Gets the hash of a datastore table.

        Args:
            resource_id (str): The datastore resource id.

        Returns:
            str: The datastore resource hash.

        Raises:
            NotFound: If `hash_table_id` isn't set or not in datastore.
            NotAuthorized: If unable to authorize ckan user.

        Examples:
            >>> CKAN(hash_table='hash_jhb34rtj34t').get_hash('rid')
            Traceback (most recent call last):
            NotFound: {u'item': u'package', u'message': u'Package \
`hash_jhb34rtj34t` was not found!'}
        """
        if not self.hash_table_pack:
            message = 'Package `%s` was not found!' % self.hash_table
            raise NotFound({'message': message, 'item': 'package'})

        if not self.hash_table_id:
            message = 'No resources found in package `%s`!' % self.hash_table
            raise NotFound({'message': message, 'item': 'resource'})

        kwargs = {
            'resource_id': self.hash_table_id,
            'filters': {
                'datastore_id': resource_id
            },
            'fields': 'hash',
            'limit': 1
        }

        err_msg = 'Resource `%s` was not found' % resource_id
        alt_msg = 'Hash table `%s` was not found' % self.hash_table_id

        try:
            result = self.datastore_search(**kwargs)
            resource_hash = result['records'][0]['hash']
        except NotFound:
            message = '%s in datastore!' % alt_msg
            raise NotFound({'message': message, 'item': 'datastore'})
        except ValidationError as err:
            if err.error_dict.get('resource_id') == ['Not found: Resource']:
                raise NotFound('%s in filestore.' % err_msg)
            else:
                raise err
        except IndexError:
            print('%s in hash table.' % err_msg)
            resource_hash = None

        if self.verbose:
            print('Resource `%s` hash is `%s`.' % (resource_id, resource_hash))

        return resource_hash
Ejemplo n.º 2
0
    def fetch_resource(self, resource_id, user_agent=None, stream=True):
        """Fetches a single resource from filestore.

        Args:
            resource_id (str): The filestore resource id.

        Kwargs:
            user_agent (str): The user agent.
            stream (bool): Stream content (default: True).

        Returns:
            obj: requests.Response object.

        Raises:
            NotFound: If unable to find the resource.
            NotAuthorized: If access to fetch resource is denied.

        Examples:
            >>> CKAN(quiet=True).fetch_resource('rid')
            Traceback (most recent call last):
            NotFound: Resource `rid` was not found in filestore.
        """
        user_agent = user_agent or self.user_agent
        err_msg = 'Resource `%s` was not found in filestore.' % resource_id

        try:
            resource = self.resource_show(id=resource_id)
        except NotFound:
            raise NotFound(err_msg)
        except ValidationError as err:
            if err.error_dict.get('resource_id') == ['Not found: Resource']:
                raise NotFound(err_msg)
            else:
                raise err

        url = resource.get('perma_link') or resource.get('url')

        if self.verbose:
            print('Downloading url %s...' % url)

        headers = {'User-Agent': user_agent}
        r = requests.get(url, stream=stream, headers=headers)
        err_msg = 'Access to fetch resource %s was denied.' % resource_id

        if any('403' in h.headers.get('x-ckan-error', '') for h in r.history):
            raise NotAuthorized(err_msg)
        elif r.status_code == 401:
            raise NotAuthorized(err_msg)
        else:
            return r
Ejemplo n.º 3
0
 def test_no_choices_on_not_found(self, LocalCKAN):
     lc = Mock()
     lc.action.datastore_search.side_effect = NotFound()
     LocalCKAN.return_value = lc
     assert_equals(scheming_datastore_choices(
         {'datastore_choices_resource': 'not-found'}), [])
     lc.action.datastore_search.assert_called_once()
Ejemplo n.º 4
0
 def test_no_choices_on_not_authorized(self, LocalCKAN):
     lc = Mock()
     lc.action.datastore_search.side_effect = NotFound()
     LocalCKAN.return_value = lc
     assert (scheming_datastore_choices(
         {"datastore_choices_resource": "not-allowed"}) == [])
     lc.action.datastore_search.assert_called_once()
Ejemplo n.º 5
0
    def get_package_id(self, resource_id):
        """Gets the package id of a single resource on filestore.

        Args:
            resource_id (str): The filestore resource id.

        Returns:
            str: The package id.

        Examples:
            >>> CKAN(quiet=True).get_package_id('rid')
            Resource `rid` was not found in filestore.
        """
        err_msg = 'Resource `%s` was not found in filestore.' % resource_id

        try:
            resource = self.resource_show(id=resource_id)
        except NotFound:
            print(err_msg)
            return None
        except ValidationError as err:
            if err.error_dict.get('resource_id') == ['Not found: Resource']:
                raise NotFound(err_msg)
            else:
                raise err
        else:
            revision = self.revision_show(id=resource['revision_id'])
            try:
                return revision['packages'][0]
            except IndexError:
                return resource['package_id']
Ejemplo n.º 6
0
    def setUp(self):
        year = FinancialYear.objects.create(slug="2030-31")
        Sphere.objects.create(financial_year=year, name='Provincial')
        self.national = Sphere.objects.create(financial_year=year,
                                              name='National')
        south_africa = Government.objects.create(sphere=self.national,
                                                 name='South Africa')
        Department.objects.create(government=south_africa,
                                  name='The Presidency',
                                  vote_number=1,
                                  intro="")

        user = User.objects.create_user(
            username=USERNAME,
            password=PASSWORD,
            is_staff=True,
            is_superuser=True,
            is_active=True,
        )
        EmailAddress.objects.create(
            user=user,
            email=EMAIL,
            verified=True,
        )

        self.path = os.path.dirname(__file__)

        # Patch CKAN API
        self.ckan_patch = patch('budgetportal.datasets.ckan')
        self.CKANMockClass = self.ckan_patch.start()
        self.CKANMockClass.action.package_search.return_value = {'results': []}
        self.CKANMockClass.action.package_show.side_effect = NotFound()
        self.CKANMockClass.action.group_show.side_effect = NotFound()
        self.addCleanup(self.ckan_patch.stop)

        self.ckan_patch2 = patch('budgetportal.models.ckan')
        self.CKANMockClass2 = self.ckan_patch2.start()
        self.CKANMockClass2.action.package_search.return_value = {
            'results': []
        }
        self.CKANMockClass2.action.package_show.side_effect = NotFound()
        self.CKANMockClass2.action.group_show.side_effect = NotFound()
        self.addCleanup(self.ckan_patch2.stop)

        super(BulkUploadTestCase, self).setUp()
Ejemplo n.º 7
0
def _action_get_dataset(context, data_dict):
    '''
    common code for actions that need to retrieve a dataset based on
    the dataset type and organization name or id
    '''
    lc, geno, results = _action_find_dataset(context, data_dict)

    if not results:
        raise NotFound()
    if len(results) > 1:
        raise ValidationError({'owner_org':
            _("Multiple datasets exist for type %s") % data_dict['dataset_type']})

    return lc, geno, results[0]
Ejemplo n.º 8
0
    def update_filestore(self, resource_id, **kwargs):
        """Updates a single resource on filestore.

        Args:
            resource_id (str): The filestore resource id.
            **kwargs: Keyword arguments that are passed to resource_create.

        Kwargs:
            url (str): New file url (for file link).
            filepath (str): New file path (for file upload).
            fileobj (obj): New file like object (for file upload).
            post (bool): Post data using requests instead of ckanapi.
            name (str): The resource name.
            description (str): The resource description.
            hash (str): The resource hash.

        Returns:
            obj: requests.Response object if `post` option is specified,
                ckan resource object otherwise.

        Examples:
            >>> CKAN(quiet=True).update_filestore('rid')
            Resource `rid` was not found in filestore.
        """
        err_msg = 'Resource `%s` was not found in filestore.' % resource_id

        try:
            resource = self.resource_show(id=resource_id)
        except NotFound:
            print(err_msg)
            return None
        except ValidationError as err:
            if err.error_dict.get('resource_id') == ['Not found: Resource']:
                raise NotFound(err_msg)
            else:
                raise err
        else:
            resource['package_id'] = self.get_package_id(resource_id)

            if self.verbose:
                print('Updating resource %s...' % resource_id)

            f, args, data = self.get_filestore_update_func(resource, **kwargs)
            return self._update_filestore(f, *args, **data)
Ejemplo n.º 9
0
    def create_table(self, resource_id, fields, **kwargs):
        """Creates a datastore table for an existing filestore resource.

        Args:
            resource_id (str): The filestore resource id.
            fields (List[dict]): fields/columns and their extra metadata.
            **kwargs: Keyword arguments that are passed to datastore_create.

        Kwargs:
            force (bool): Create resource even if read-only.
            aliases (List[str]): name(s) for read only alias(es) of the
                resource.
            primary_key (List[str]): field(s) that represent a unique key.
            indexes (List[str]): index(es) on table.

        Returns:
            dict: The newly created data object.

        Raises:
            ValidationError: If unable to validate user on ckan site.
            NotFound: If unable to find resource.

        Examples:
        >>> CKAN(quiet=True).create_table('rid', fields=[{'id': 'field', \
'type': 'text'}])
        Traceback (most recent call last):
        NotFound: Resource `rid` was not found in filestore.
        """
        kwargs.setdefault('force', self.force)
        kwargs['resource_id'] = resource_id
        kwargs['fields'] = fields
        err_msg = 'Resource `%s` was not found in filestore.' % resource_id

        if self.verbose:
            print('Creating table `%s` in datastore...' % resource_id)

        try:
            return self.datastore_create(**kwargs)
        except ValidationError as err:
            if err.error_dict.get('resource_id') == ['Not found: Resource']:
                raise NotFound(err_msg)
            else:
                raise
Ejemplo n.º 10
0
    def insert_records(self, resource_id, records, **kwargs):
        """Inserts records into a datastore table.

        Args:
            resource_id (str): The datastore resource id.
            records (List[dict]): The records to insert.
            **kwargs: Keyword arguments that are passed to datastore_create.

        Kwargs:
            method (str): Insert method. One of ['update, 'insert', 'upsert']
                (default: 'insert').
            force (bool): Create resource even if read-only.
            start (int): Row number to start from (zero indexed).
            stop (int): Row number to stop at (zero indexed).
            chunksize (int): Number of rows to write at a time.

        Returns:
            int: Number of records inserted.

        Raises:
            NotFound: If unable to find the resource.

        Examples:
            >>> CKAN(quiet=True).insert_records('rid', [{'field': 'value'}])
            Traceback (most recent call last):
            NotFound: Resource `rid` was not found in filestore.
        """
        recoded = pr.json_recode(records)
        chunksize = kwargs.pop('chunksize', 0)
        start = kwargs.pop('start', 0)
        stop = kwargs.pop('stop', None)

        kwargs.setdefault('force', self.force)
        kwargs.setdefault('method', 'insert')
        kwargs['resource_id'] = resource_id
        count = 1

        for chunk in ft.chunk(recoded, chunksize, start=start, stop=stop):
            length = len(chunk)

            if self.verbose:
                print('Adding records %i - %i to resource %s...' %
                      (count, count + length - 1, resource_id))

            kwargs['records'] = chunk
            err_msg = 'Resource `%s` was not found in filestore.' % resource_id

            try:
                self.datastore_upsert(**kwargs)
            except requests.exceptions.ConnectionError as err:
                if 'Broken pipe' in err.message[1]:
                    print(
                        'Chunksize too large. Try using a smaller chunksize.')
                    return 0
                else:
                    raise err
            except NotFound:
                # Keep exception message consistent with the others
                raise NotFound(err_msg)
            except ValidationError as err:
                if err.error_dict.get('resource_id') == [
                        'Not found: Resource'
                ]:
                    raise NotFound(err_msg)
                else:
                    raise err

            count += length

        return count