Beispiel #1
0
    def download(self, id, filename=None):
        '''
        Provide a download by either redirecting the user to the url stored or
        downloading the uploaded file from S3.
        '''

        if not self.use_filename or filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        key = filename

        if key is None:
            log.warning("Key '%s' not found in bucket '%s'", key_path,
                        self.bucket_name)

        try:
            url = self.get_signed_url_to_key(key_path)
            h.redirect_to(url)

        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                # attempt fallback
                default_resource_upload = DefaultResourceUpload(self.resource)
                return default_resource_upload.download(id, self.filename)
            else:
                # Controller will raise 404 for us
                raise OSError(errno.ENOENT)
Beispiel #2
0
    def _handle_files(self, action, data_dict, context, files):
        if action not in ['resource_create', 'resource_update']:
            raise CKANAPIError("LocalCKAN.call_action only supports file uploads for resources.")

        new_data_dict = dict(data_dict)
        if action == 'resource_create':
            if 'url' not in new_data_dict or new_data_dict['url']:
                new_data_dict['url'] = '/tmp-file' # url needs to be set, otherwise there is a ValidationError
            resource = self._get_action(action)(dict(context), new_data_dict)
        else:
            resource = new_data_dict

        from ckan.lib.uploader import ResourceUpload
        resource_upload = ResourceUpload({'id': resource['id']})

        # get first upload, ignore key
        source_file = files.values()[0]
        if not resource_upload.storage_path:
            raise CKANAPIError("No storage configured, unable to upload files")

        directory = resource_upload.get_directory(resource['id'])
        filepath = resource_upload.get_path(resource['id'])
        try:
            os.makedirs(directory)
        except OSError, e:
            ## errno 17 is file already exists
            if e.errno != 17:
                raise
    def filesystem_resource_download(self, id, resource_id, filename=None):
        """
        A fallback controller action to download resources from the
        filesystem. A copy of the action from
        `ckan.controllers.package:PackageController.resource_download`.

        Provide a direct download by either redirecting the user to the url
        stored or downloading an uploaded file directly.
        """
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj
        }

        try:
            rsc = get_action('resource_show')(context, {'id': resource_id})
            get_action('package_show')(context, {'id': id})
        except NotFound:
            abort(404, _('Resource not found'))
        except NotAuthorized:
            abort(401, _('Unauthorised to read resource %s') % resource_id)

        if rsc.get('url_type') == 'upload':
            upload = DefaultResourceUpload(rsc)
            try:
                return upload.download(rsc['id'], filename)
            except OSError:
                # includes FileNotFoundError
                abort(404, _('Resource data not found'))
        elif 'url' not in rsc:
            abort(404, _('No download is available'))
        redirect(rsc['url'])
Beispiel #4
0
def get_local_upload_path(resource_id):
    u'''
    Returns the local path to an uploaded file give an id

    Note: it does not check if the resource or file actually exists
    '''
    upload = ResourceUpload({u'url': u'foo'})
    return upload.get_path(resource_id)
def filesystem_resource_download(package_type, id, resource_id, filename=None):
    """
    A fallback view action to download resources from the
    filesystem. A copy of the action from
    `ckan.views.resource:download`.

    Provide a direct download by either redirecting the user to the url
    stored or downloading an uploaded file directly.
    """
    context = {
        u'model': model,
        u'session': model.Session,
        u'user': g.user,
        u'auth_user_obj': g.userobj
    }
    preview = request.args.get(u'preview', False)

    try:
        rsc = get_action(u'resource_show')(context, {u'id': resource_id})
        get_action(u'package_show')(context, {u'id': id})
    except NotFound:
        return abort(404, _(u'Resource not found'))
    except NotAuthorized:
        return abort(401, _('Unauthorised to read resource %s') % resource_id)

    mimetype, enc = mimetypes.guess_type(rsc.get('url', ''))

    if rsc.get(u'url_type') == u'upload':
        if hasattr(DefaultResourceUpload, 'download'):
            upload = DefaultResourceUpload(rsc)
            try:
                return upload.download(rsc['id'], filename)
            except OSError:
                # includes FileNotFoundError
                return abort(404, _('Resource data not found'))

        path = get_storage_path()
        storage_path = os.path.join(path, 'resources')
        directory = os.path.join(storage_path, resource_id[0:3],
                                 resource_id[3:6])
        filepath = os.path.join(directory, resource_id[6:])
        if preview:
            return flask.send_file(filepath, mimetype=mimetype)
        else:
            return flask.send_file(filepath)
    elif u'url' not in rsc:
        return abort(404, _(u'No download is available'))
    return redirect(rsc[u'url'])
Beispiel #6
0
    def metadata(self, id, filename=None):
        if filename is None:
            filename = os.path.basename(self.url)
        filename = munge.munge_filename(filename)
        key_path = self.get_path(id, filename)
        key = filename

        if key is None:
            log.warning("Key '%s' not found in bucket '%s'", key_path,
                        self.bucket_name)

        try:
            # Small workaround to manage downloading of large files
            # We are using redirect to minio's resource public URL
            client = self.get_s3_client()

            metadata = client.head_object(Bucket=self.bucket_name,
                                          Key=key_path)
            metadata['content_type'] = metadata['ContentType']

            # Drop non public metadata
            metadata.pop('ServerSideEncryption', None)
            metadata.pop('SSECustomerAlgorithm', None)
            metadata.pop('SSECustomerKeyMD5', None)
            metadata.pop('SSEKMSKeyId', None)
            metadata.pop('StorageClass', None)
            metadata.pop('RequestCharged', None)
            metadata.pop('ReplicationStatus', None)
            metadata.pop('ObjectLockLegalHoldStatus', None)

            metadata['size'] = metadata['ContentLength']
            metadata['hash'] = metadata['ETag']
            return self.as_clean_dict(metadata)
        except ClientError as ex:
            if ex.response['Error']['Code'] in ['NoSuchKey', '404']:
                if config.get(
                        'ckanext.s3filestore.filesystem_download_fallback',
                        False):
                    log.info('Attempting filesystem fallback for resource %s',
                             id)

                    default_resource_upload = DefaultResourceUpload(
                        self.resource)
                    return default_resource_upload.metadata(id)

            #Uploader interface does not know about s3 errors
            raise OSError(errno.ENOENT)
Beispiel #7
0
    def migrate(self):
        '''
        Migrate filestore over in our very HDXish way :)
        '''
        results = Session.execute(
            "select id, revision_id, url from resource "
            "where resource_type = 'file.upload' "
            "and (url_type <> 'upload' or url_type is null)"
            "and url like '%storage%'")
        for id, revision_id, url in results:
            # Give it a second, would you?
            time.sleep(0.7)
            url_parts = urlparse(url)
            url_parts = url_parts.path.split("/")
            filename = url_parts[len(url_parts) - 1]
            response = requests.get(url, stream=True)
            if response.status_code != 200:
                print "failed to fetch %s (code %s)" % (url,
                                                        response.status_code)
                continue
            resource_upload = ResourceUpload({'id': id})
            assert resource_upload.storage_path, "no storage configured aborting"

            directory = resource_upload.get_directory(id)
            filepath = resource_upload.get_path(id)
            try:
                os.makedirs(directory)
            except OSError, e:
                ## errno 17 is file already exists
                if e.errno != 17:
                    raise

            with open(filepath, 'wb+') as out:
                for chunk in response.iter_content(1024):
                    if chunk:
                        out.write(chunk)

            Session.execute("update resource set url_type = 'upload', "
                            "url = '%s' where id = '%s'" % (filename, id))
            Session.execute(
                "update resource_revision set url_type = 'upload', "
                "url = '%s' where id = '%s' and "
                "revision_id = '%s'" % (filename, id, revision_id))
            Session.commit()
            print "Saved url %s" % url
Beispiel #8
0
    def test_resource_without_upload(self, mock_uploads_enabled, mock_open,
                                     send_file):
        # this test data is based on real observation using a browser
        res = {
            u'clear_upload': u'true',
            u'format': u'PNG',
            u'url': u'https://example.com/data.csv',
            u'description': u'',
            u'upload': FileStorage(filename=u''),
            u'package_id': u'dataset1',
            u'id': u'8a3a874e-5ee1-4e43-bdaf-e2569cf72344',
            u'name': u'data.csv'
        }
        res_upload = ResourceUpload(res)

        eq_(res_upload.filename, None)
Beispiel #9
0
    def test_resource_without_upload(self, ckan_config, monkeypatch, tmpdir):
        monkeypatch.setitem(ckan_config, u'ckan.storage_path', str(tmpdir))
        # this test data is based on real observation using a browser
        res = {
            u'clear_upload': u'true',
            u'format': u'PNG',
            u'url': u'https://example.com/data.csv',
            u'description': u'',
            u'upload': FileStorage(filename=u''),
            u'package_id': u'dataset1',
            u'id': u'8a3a874e-5ee1-4e43-bdaf-e2569cf72344',
            u'name': u'data.csv'
        }
        res_upload = ResourceUpload(res)

        assert res_upload.filename is None
Beispiel #10
0
 def get_resource_uploader(self, data_dict):
     return ResourceUpload(data_dict)