Example #1
0
 def test_munge_filename_multiple_pass(self):
     '''Munging filename multiple times produces same result.'''
     for org, exp in self.munge_list:
         first_munge = munge_filename_legacy(org)
         assert_equal(first_munge, exp)
         second_munge = munge_filename_legacy(first_munge)
         assert_equal(second_munge, exp)
Example #2
0
 def test_munge_filename_multiple_pass(self):
     '''Munging filename multiple times produces same result.'''
     for org, exp in self.munge_list:
         first_munge = munge_filename_legacy(org)
         nose_tools.assert_equal(first_munge, exp)
         second_munge = munge_filename_legacy(first_munge)
         nose_tools.assert_equal(second_munge, exp)
Example #3
0
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        '''Manipulate data from the data_dict. This needs to be called before it
        reaches any validators.

        `url_field` is the name of the field where the upload is going to be.

        `file_field` is name of the key where the FieldStorage is kept (i.e
        the field where the file data actually is).

        `clear_field` is the name of a boolean field which requests the upload
        to be deleted.
        '''

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return
        if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filename = self.upload_field_storage.filename
            self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename_legacy(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            data_dict[url_field] = self.filename
            self.upload_file = _get_underlying_file(self.upload_field_storage)
        # keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
Example #4
0
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        ''' Manipulate data from the data_dict.  url_field is the name of the
        field where the upload is going to be. file_field is name of the key
        where the FieldStorage is kept (i.e the field where the file data
        actually is). clear_field is the name of a boolean field which
        requests the upload to be deleted.  This needs to be called before
        it reaches any validators'''

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return

        if isinstance(self.upload_field_storage, cgi.FieldStorage):
            self.filename = self.upload_field_storage.filename
            self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename_legacy(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            data_dict[url_field] = self.filename
            self.upload_file = self.upload_field_storage.file
            self.tmp_filepath = self.filepath + '~'
        # keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
 def _create_uploaded_filename(uploaded_file_field):
     # type: (UploadedFileWrapper) -> str
     """Create a filename for storage for the new uploaded file
     """
     now = str(datetime.datetime.utcnow())
     filename = '{}-{}'.format(now, uploaded_file_field.filename)
     return munge_filename_legacy(filename)
Example #6
0
    def download(self, filename):
        '''
        Provide a download by either redirecting the user to the url stored or
        downloading the uploaded file from S3.
        '''

        filename = munge.munge_filename_legacy(filename)
        key_path = os.path.join(self.storage_path, filename)

        if key_path is None:
            log.warning("Key '%s' not found in bucket '%s'", key_path,
                        self.bucket_name)

        try:
            url = self.get_signed_url_to_key(key_path)
            h.redirect_to(url)

        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                if config.get(
                        'ckanext.s3filestore.filesystem_download_fallback',
                        False):
                    log.info('Attempting filesystem fallback for resource %s',
                             id)
                    default_upload = DefaultUpload(self.upload_to)
                    return default_upload.download(filename)

            # Uploader interface does not know about s3 errors
            raise OSError(errno.ENOENT)
Example #7
0
    def metadata(self, filename):
        '''
        Provide metadata about the download, such as might be obtained from a HTTP HEAD request.
        Returns a dict that includes 'ContentType', 'ContentLength', 'Hash', and 'LastModified',
        and may include other keys depending on the implementation.
        '''
        filename = munge.munge_filename_legacy(filename)
        key_path = os.path.join(self.storage_path, filename)

        if filename is None:
            log.warning("Key '%s' not found in bucket '%s'", filename,
                        self.bucket_name)

        try:
            client = self.get_s3_client()

            metadata = client.head_object(Bucket=self.bucket_name,
                                          Key=key_path)
            metadata['content_type'] = metadata['ContentType']
            metadata['size'] = metadata['ContentLength']
            metadata['hash'] = metadata['ETag']
            return self.as_clean_dict(metadata)
        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                if config.get(
                        'ckanext.s3filestore.filesystem_download_fallback',
                        False):
                    log.info('Attempting filesystem fallback for resource %s',
                             id)

                    default_upload = DefaultUpload(self.upload_to)
                    return default_upload.metadata(filename)

            # Uploader interface does not know about s3 errors
            raise OSError(errno.ENOENT)
Example #8
0
 def delete(self, filename):
     ''' Delete file we are pointing at'''
     filename = munge.munge_filename_legacy(filename)
     key_path = os.path.join(self.storage_path, filename)
     try:
         self.clear_key(key_path)
     except ClientError as ex:
         log.warning('Key \'%s\' not found in bucket \'%s\' for delete',
                     key_path, self.bucket_name)
         pass
Example #9
0
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        log.debug(
            "ckanext.s3filestore.uploader: update_data_dic: %s, url %s, file %s, clear %s",
            data_dict, url_field, file_field, clear_field)
        '''Manipulate data from the data_dict. This needs to be called before it
        reaches any validators.

        `url_field` is the name of the field where the upload is going to be.

        `file_field` is name of the key where the FieldStorage is kept (i.e
        the field where the file data actually is).

        `clear_field` is the name of a boolean field which requests the upload
        to be deleted.
        '''

        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)
        self.upload_file = None
        self.preserve_filename = data_dict.get('preserve_filename', False)

        if not self.storage_path:
            return
        if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filename = self.upload_field_storage.filename
            if not self.preserve_filename:
                self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename_legacy(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            if hasattr(self.upload_field_storage, 'mimetype'):
                self.mimetype = self.upload_field_storage.mimetype
            else:
                try:
                    self.mimetype = mimetypes.guess_type(self.filename,
                                                         strict=False)[0]
                except Exception:
                    pass
            data_dict[url_field] = self.filename
            self.upload_file = _get_underlying_file(self.upload_field_storage)
            log.debug(
                "ckanext.s3filestore.uploader: is allowed upload type: filename: %s, upload_file: %s, data_dict: %s",
                self.filename, self.upload_file, data_dict)
        # keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
        else:
            log.debug(
                "ckanext.s3filestore.uploader: is not allowed upload type: filename: %s, upload_file: %s, data_dict: %s",
                self.filename, self.upload_file, data_dict)
    def update_data_dict(self, data_dict, url_field, file_field, clear_field):
        log.debug('update_data_dict')
        '''Manipulate data from the data_dict. This needs to be called before it
        reaches any validators.

        `url_field` is the name of the field where the upload is going to be.

        `file_field` is name of the key where the FieldStorage is kept (i.e
        the field where the file data actually is).

        `clear_field` is the name of a boolean field which requests the upload
        to be deleted.
        '''
        self.url = data_dict.get(url_field, '')
        self.clear = data_dict.pop(clear_field, None)
        self.file_field = file_field
        self.upload_field_storage = data_dict.pop(file_field, None)

        if not self.storage_path:
            return

        if hasattr(self.upload_field_storage, 'filename'):
            self.filename = self.upload_field_storage.filename
            self.filename = str(datetime.datetime.utcnow()) + self.filename
            self.filename = munge.munge_filename_legacy(self.filename)
            self.filepath = os.path.join(self.storage_path, self.filename)
            bucket_endpoint = config.get('ckanext.cloud_storage.s3.endpoint')
            self.remote_filepath = os.path.join(bucket_endpoint, self.filepath)
            #log.debug(self.remote_filepath)
            #log.debug(self.filename)
            data_dict[url_field] = self.remote_filepath
            self.upload_file = self.upload_field_storage.file
        # keep the file if there has been no change
        elif self.old_filename and not self.old_filename.startswith('http'):
            if not self.clear:
                data_dict[url_field] = self.old_filename
            if self.clear and self.url == self.old_filename:
                data_dict[url_field] = ''
Example #11
0
 def test_munge_filename(self):
     '''Munge a list of filenames gives expected results.'''
     for org, exp in self.munge_list:
         munge = munge_filename_legacy(org)
         nose_tools.assert_equal(munge, exp)
Example #12
0
def test_munge_filename_legacy_pass(original, expected):
    """Munging filename multiple times produces same result."""
    first_munge = munge_filename_legacy(original)
    assert first_munge == expected
    second_munge = munge_filename_legacy(first_munge)
    assert second_munge == expected
Example #13
0
 def test_munge_filename(self):
     '''Munge a list of filenames gives expected results.'''
     for org, exp in self.munge_list:
         munge = munge_filename_legacy(org)
         assert_equal(munge, exp)
Example #14
0
def group_dictize(group, context,
                  include_groups=True,
                  include_tags=True,
                  include_users=True,
                  include_extras=True,
                  packages_field='datasets',
                  **kw):
    '''
    Turns a Group object and related into a dictionary. The related objects
    like tags are included unless you specify it in the params.

    :param packages_field: determines the format of the `packages` field - can
    be `datasets` or None.
    '''
    assert packages_field in ('datasets', 'dataset_count', None)
    if packages_field == 'dataset_count':
        dataset_counts = context.get('dataset_counts', None)

    result_dict = d.table_dictize(group, context)
    result_dict.update(kw)

    result_dict['display_name'] = group.title or group.name

    if include_extras:
        result_dict['extras'] = extras_dict_dictize(
            group._extras, context)

    context['with_capacity'] = True

    if packages_field:
        def get_packages_for_this_group(group_, just_the_count=False):
            # Ask SOLR for the list of packages for this org/group
            q = {
                'facet': 'false',
                'rows': 0,
            }

            if group_.is_organization:
                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
            else:
                q['fq'] = 'groups:"{0}"'.format(group_.name)

            # Allow members of organizations to see private datasets.
            if group_.is_organization:
                is_group_member = (context.get('user') and
                    authz.has_user_permission_for_group_or_org(
                        group_.id, context.get('user'), 'read'))
                if is_group_member:
                    context['ignore_capacity_check'] = True

            if not just_the_count:
                # Is there a packages limit in the context?
                try:
                    packages_limit = context['limits']['packages']
                except KeyError:
                    q['rows'] = 1000  # Only the first 1000 datasets are returned
                else:
                    q['rows'] = packages_limit

            search_context = dict((k, v) for (k, v) in context.items()
                                  if k != 'schema')
            search_results = logic.get_action('package_search')(search_context,
                                                                q)
            return search_results['count'], search_results['results']

        if packages_field == 'datasets':
            package_count, packages = get_packages_for_this_group(group)
            result_dict['packages'] = packages
        else:
            if dataset_counts is None:
                package_count, packages = get_packages_for_this_group(
                    group, just_the_count=True)
            else:
                # Use the pre-calculated package_counts passed in.
                facets = dataset_counts
                if group.is_organization:
                    package_count = facets['owner_org'].get(group.id, 0)
                else:
                    package_count = facets['groups'].get(group.name, 0)

        result_dict['package_count'] = package_count

    if include_tags:
        # group tags are not creatable via the API yet, but that was(/is) a
        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
        result_dict['tags'] = tag_list_dictize(
            _get_members(context, group, 'tags'),
            context)

    if include_groups:
        # these sub-groups won't have tags or extras for speed
        result_dict['groups'] = group_list_dictize(
            _get_members(context, group, 'groups'),
            context, include_groups=True)

    if include_users:
        result_dict['users'] = user_list_dictize(
            _get_members(context, group, 'users'),
            context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename_legacy(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'),
            qualified=True
        )
    return result_dict
Example #15
0
def user_dictize(user: Union[model.User, tuple[model.User, str]],
                 context: Context,
                 include_password_hash: bool = False,
                 include_plugin_extras: bool = False) -> dict[str, Any]:
    model = context['model']

    if context.get('with_capacity'):
        # Fix type: "User" is not iterable
        user, capacity = user  #type: ignore
        result_dict = d.table_dictize(user, context, capacity=capacity)
    else:
        result_dict = d.table_dictize(user, context)

    assert isinstance(user, model.User)
    password_hash = result_dict.pop('password')
    del result_dict['reset_key']

    result_dict['display_name'] = user.display_name
    result_dict['email_hash'] = user.email_hash
    result_dict['number_created_packages'] = user.number_created_packages(
        include_private_and_draft=context.get(
            'count_private_and_draft_datasets', False))

    requester = context.get('user')

    result_dict.pop('reset_key', None)
    apikey = result_dict.pop('apikey', None)
    email = result_dict.pop('email', None)
    plugin_extras = result_dict.pop('plugin_extras', None)

    if context.get('keep_email', False):
        result_dict['email'] = email

    if context.get('keep_apikey', False):
        result_dict['apikey'] = apikey

    if requester == user.name:
        result_dict['apikey'] = apikey
        result_dict['email'] = email

    if authz.is_sysadmin(requester):
        result_dict['apikey'] = apikey
        result_dict['email'] = email

        if include_password_hash:
            result_dict['password_hash'] = password_hash

        if include_plugin_extras:
            result_dict['plugin_extras'] = copy.deepcopy(
                plugin_extras) if plugin_extras else plugin_extras

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        # munge here should not have any effect, only doing it in case
        # of potential vulnerability of dodgy api input.
        image_url = munge.munge_filename_legacy(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/user/%s' % result_dict.get('image_url'), qualified=True)

    return result_dict
Example #16
0
def group_dictize(group,
                  context,
                  include_groups=True,
                  include_tags=True,
                  include_users=True,
                  include_extras=True,
                  packages_field='datasets',
                  **kw):
    '''
    Turns a Group object and related into a dictionary. The related objects
    like tags are included unless you specify it in the params.

    :param packages_field: determines the format of the `packages` field - can
    be `datasets` or None.
    '''
    assert packages_field in ('datasets', 'dataset_count', None)
    if packages_field == 'dataset_count':
        dataset_counts = context.get('dataset_counts', None)

    result_dict = d.table_dictize(group, context)
    result_dict.update(kw)

    result_dict['display_name'] = group.title or group.name

    if include_extras:
        result_dict['extras'] = extras_dict_dictize(group._extras, context)

    context['with_capacity'] = True

    if packages_field:

        def get_packages_for_this_group(group_, just_the_count=False):
            # Ask SOLR for the list of packages for this org/group
            q = {
                'facet': 'false',
                'rows': 0,
            }

            if group_.is_organization:
                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
            else:
                q['fq'] = 'groups:"{0}"'.format(group_.name)

            # Allow members of organizations to see private datasets.
            if group_.is_organization:
                is_group_member = (context.get('user') and
                                   authz.has_user_permission_for_group_or_org(
                                       group_.id, context.get('user'), 'read'))
                if is_group_member:
                    q['include_private'] = True

            if not just_the_count:
                # Is there a packages limit in the context?
                try:
                    packages_limit = context['limits']['packages']
                except KeyError:
                    q['rows'] = 1000  # Only the first 1000 datasets are returned
                else:
                    q['rows'] = packages_limit

            search_context = dict(
                (k, v) for (k, v) in context.items() if k != 'schema')
            search_results = logic.get_action('package_search')(search_context,
                                                                q)
            return search_results['count'], search_results['results']

        if packages_field == 'datasets':
            package_count, packages = get_packages_for_this_group(group)
            result_dict['packages'] = packages
        else:
            if dataset_counts is None:
                package_count, packages = get_packages_for_this_group(
                    group, just_the_count=True)
            else:
                # Use the pre-calculated package_counts passed in.
                facets = dataset_counts
                if group.is_organization:
                    package_count = facets['owner_org'].get(group.id, 0)
                else:
                    package_count = facets['groups'].get(group.name, 0)

        result_dict['package_count'] = package_count

    if include_tags:
        # group tags are not creatable via the API yet, but that was(/is) a
        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
        result_dict['tags'] = tag_list_dictize(
            _get_members(context, group, 'tags'), context)

    if include_groups:
        # these sub-groups won't have tags or extras for speed
        result_dict['groups'] = group_list_dictize(_get_members(
            context, group, 'groups'),
                                                   context,
                                                   include_groups=True)

    if include_users:
        result_dict['users'] = user_list_dictize(
            _get_members(context, group, 'users'), context)

    context['with_capacity'] = False

    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)

    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        #munge here should not have an effect only doing it incase
        #of potential vulnerability of dodgy api input
        image_url = munge.munge_filename_legacy(image_url)
        result_dict['image_display_url'] = h.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'), qualified=True)
    return result_dict
def group_dictize(group,
                  context,
                  include_groups=True,
                  include_tags=True,
                  include_users=True,
                  include_extras=True,
                  packages_field='datasets',
                  **kw):
    assert packages_field in ('datasets', 'dataset_count', None)
    if packages_field == 'dataset_count':
        dataset_counts = context.get('dataset_counts', None)
    result_dict = d.table_dictize(group, context)
    result_dict.update(kw)
    result_dict['display_name'] = group.title or group.name
    if include_extras:
        result_dict[
            'extras'] = ckan.lib.dictization.model_dictize.extras_dict_dictize(
                group._extras, context)
    context['with_capacity'] = True
    if packages_field:

        def get_packages_for_this_group(group_, just_the_count=False):
            q = {
                'facet': 'false',
                'rows': 0,
            }
            if group_.is_organization:
                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
            else:
                q['fq'] = 'groups:"{0}"'.format(group_.name)
            if group_.is_organization:
                is_group_member = (context.get('user') and
                                   authz.has_user_permission_for_group_or_org(
                                       group_.id, context.get('user'), 'read'))
                if is_group_member:
                    q['include_private'] = True
            if not just_the_count:
                try:
                    packages_limit = context['limits']['packages']
                except KeyError:
                    q['rows'] = 1000  # Only the first 1000 datasets are returned
                else:
                    q['rows'] = packages_limit
            search_context = dict(
                (k, v) for (k, v) in context.items() if k != 'schema')
            search_results = package_search(search_context, q)
            return search_results['count'], search_results['results']

        if packages_field == 'datasets':
            package_count, packages = get_packages_for_this_group(group)
            result_dict['packages'] = packages
        else:
            if dataset_counts is None:
                package_count, packages = get_packages_for_this_group(
                    group, just_the_count=True)
            else:
                facets = dataset_counts
                if group.is_organization:
                    package_count = facets['owner_org'].get(group.id, 0)
                else:
                    package_count = facets['groups'].get(group.name, 0)
        result_dict['package_count'] = package_count
    if include_tags:
        result_dict[
            'tags'] = ckan.lib.dictization.model_dictize.tag_list_dictize(
                ckan.lib.dictization.model_dictize._get_members(
                    context, group, 'tags'), context)
    if include_groups:
        result_dict[
            'groups'] = ckan.lib.dictization.model_dictize.group_list_dictize(
                ckan.lib.dictization.model_dictize._get_members(
                    context, group, 'groups'),
                context,
                include_groups=True)
    if include_users:
        result_dict[
            'users'] = ckan.lib.dictization.model_dictize.user_list_dictize(
                ckan.lib.dictization.model_dictize._get_members(
                    context, group, 'users'), context)
    context['with_capacity'] = False
    if context.get('for_view'):
        if result_dict['is_organization']:
            plugin = plugins.IOrganizationController
        else:
            plugin = plugins.IGroupController
        for item in plugins.PluginImplementations(plugin):
            result_dict = item.before_view(result_dict)
    image_url = result_dict.get('image_url')
    result_dict['image_display_url'] = image_url
    if image_url and not image_url.startswith('http'):
        image_url = munge.munge_filename_legacy(image_url)
        result_dict['image_display_url'] = helpers.url_for_static(
            'uploads/group/%s' % result_dict.get('image_url'), qualified=True)
    return result_dict