def upload(self, max_size=2):
        """Actually upload the file.

        max_size is the maximum file size to accept in megabytes
        (note that not all backends will support this limitation).
        """
        if self._uploaded_file is not None:
            _log.debug("Initiating file upload for %s, storage is %s", self._filename, self._storage)
            size = get_uploaded_size(self._uploaded_file)
            _log.debug("Detected file size: %s", size)
            if size and max_size and size > max_size * MB:
                raise toolkit.ValidationError({'upload': ['File upload too large']})

            mimetype = get_uploaded_mimetype(self._uploaded_file)
            _log.debug("Detected file MIME type: %s", mimetype)
            stored = self._storage.upload(_get_underlying_file(self._uploaded_file),
                                          self._filename,
                                          self._object_type,
                                          mimetype=mimetype)
            _log.debug("Finished uploading file %s, %d bytes written to storage", self._filename, stored)
            self._clear = True

        if self._clear \
                and self._old_filename \
                and not is_absolute_http_url(self._old_filename):
            _log.debug("Clearing old asset file: %s", self._old_filename)
            self._storage.delete(self._old_filename)
Example #2
0
    def _process_schema_fields(self, data_dict):
        u'''
        Normalize the different ways of providing the `schema` field

        1. If `schema_upload` is provided and it's a valid file, the contents
           are read into `schema`.
        2. If `schema_url` is provided and looks like a valid URL, it's copied
           to `schema`
        3. If `schema_json` is provided, it's copied to `schema`.

        All the 3 `schema_*` fields are removed from the data_dict.
        Note that the data_dict still needs to pass validation
        '''

        schema_upload = data_dict.pop(u'schema_upload', None)
        schema_url = data_dict.pop(u'schema_url', None)
        schema_json = data_dict.pop(u'schema_json', None)

        if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES) \
                and schema_upload.filename:
            data_dict[u'schema'] = _get_underlying_file(schema_upload).read()
        elif schema_url:
            if (not isinstance(schema_url, string_types)
                    or not schema_url.lower()[:4] == u'http'):
                raise t.ValidationError({u'schema_url': 'Must be a valid URL'})
            data_dict[u'schema'] = schema_url
        elif schema_json:
            data_dict[u'schema'] = schema_json

        return data_dict
def get_uploaded_size(uploaded):
    # type: (UploadedFileWrapper) -> Optional[int]
    """Try to figure out the size in bytes of an uploaded file

    This may not work for all uploaded file types, in which case None will
    be returned
    """
    # Let's try to get the size from the stream first, as it is more reliable and secure
    stream = _get_underlying_file(uploaded)
    try:
        stream.seek(0, os.SEEK_END)
        size = stream.tell()
        stream.seek(0)
        return size
    except (AttributeError, IOError):  # no seek / non-seekable stream
        pass

    try:
        # FlaskFileStorage / cgi.FieldStorage
        return uploaded.headers['Content-Length']
    except (AttributeError, KeyError):
        pass

    return None
Example #4
0
    def __init__(self, resource):
        path = get_storage_path()
        config_mimetype_guess = config.get('ckan.mimetype_guess', 'file_ext')

        if not path:
            self.storage_path = None
            return
        self.storage_path = os.path.join(path, 'resources')
        try:
            os.makedirs(self.storage_path)
        except OSError as e:
            # errno 17 is file already exists
            if e.errno != 17:
                raise
        self.filename = None
        self.mimetype = None

        url = resource.get('url')

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if url and config_mimetype_guess == 'file_ext':
            self.mimetype = mimetypes.guess_type(url)[0]

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            # MODIFICATION START
            self.filename = secure_filename(self.filename)  # Overkill but I
            # trust werkzueg over ckan.
            # MODIFICATION END
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.upload_file = _get_underlying_file(upload_field_storage)
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)

            # MODIFICATION START
            # Note: If resubmitting a failed form without clearing the file
            # the ResourceUpload.upload function would be called skipping the
            # init call.
            if not allowed_file(self.filename):
                log.error('Upload: Invalid upload file format.{}'.format(
                    self.filename))
                # remove file - by default a resource can be added without any
                # values
                resource['url'] = None
                resource['url_type'] = ''
                raise logic.ValidationError({
                    'upload':
                    ['Invalid upload file format, file has been removed.']
                })
            # MODIFICATION END

            # check if the mimetype failed from guessing with the url
            if not self.mimetype and config_mimetype_guess == 'file_ext':
                self.mimetype = mimetypes.guess_type(self.filename)[0]

            if not self.mimetype and config_mimetype_guess == 'file_contents':
                try:
                    self.mimetype = magic.from_buffer(self.upload_file.read(),
                                                      mime=True)
                    self.upload_file.seek(0, os.SEEK_SET)
                except IOError as e:
                    # Not that important if call above fails
                    self.mimetype = None

        elif self.clear:
            resource['url_type'] = ''