Example #1
0
    def create_questionnaire_resource(self, context, pkg_dict, survey):
        """ Create a resource for the questionnaire """
        from ckanext.unhcr.jobs import download_kobo_export

        # create empty resources to be updated later
        f = tempfile.NamedTemporaryFile()

        resource = {
            'package_id': pkg_dict['id'],
            'upload': FlaskFileStorage(filename=f.name,
                                       stream=open(f.name, 'rb')),
            'name': 'Questionnaire XLS',
            'description': 'Questionnaire imported from the KoBo survey',
            'format': 'xls',
            'url_type': 'upload',
            'type': 'attachment',
            'visibility': 'public',
            'file_type': 'questionnaire',
            'kobo_type': 'questionnaire',
            'kobo_details': {
                'kobo_asset_id': self.kobo_asset_id,
                'kobo_download_status': 'pending',
                'kobo_download_attempts': 0,
                'kobo_last_updated': datetime.datetime.utcnow().isoformat()
            }
        }

        action = toolkit.get_action("resource_create")
        resource = action(context, resource)

        # Start a job to download the questionnaire
        toolkit.enqueue_job(download_kobo_export, [resource['id']],
                            title='Download KoBoToolbox questionnaire')

        return resource
Example #2
0
    def test_after_kobo_resource_create_scan_submit_hook_called(self):

        f = tempfile.NamedTemporaryFile()
        new_resource_dict = {
            'package_id': self.dataset['id'],
            'url_type': 'upload',
            'upload': FlaskFileStorage(filename=f.name,
                                       stream=open(f.name, 'rb')),
            'type': 'data',
            'file_type': 'microdata',
            'identifiability': 'anonymized_public',
            'date_range_start': '2018-01-01',
            'date_range_end': '2019-01-01',
            'process_status': 'anonymized',
            'visibility': 'public',
            'version': '1',
        }
        action = toolkit.get_action("resource_create")

        action_call = mock.Mock()
        action_call.return_value = lambda conext, data_dict: True
        with mock.patch('ckan.plugins.toolkit.get_action', action_call):
            ctx = {'user': self.sysadmin['name']}
            action(ctx, new_resource_dict)

        action_call.assert_called()
        mock_calls = [fn[0][0] for fn in action_call.call_args_list]
        assert 'scan_submit' in mock_calls
Example #3
0
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = FlaskFileStorage(filename=file_name,
                                        stream=open(file_path),
                                        content_type='image/png')

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create',
                                context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))

        # clear upload
        helpers.call_action('group_update',
                            context=context,
                            id='my-group',
                            name='my-group',
                            image_url="http://asdf",
                            clear_upload=True)

        # key shouldn't exist
        #assert_false(bucket.lookup(key))
        try:
            s3.head_object(Bucket='my-bucket', Key=key)
            assert_false(True, "file should not exist")
        except:
            # passed
            assert_true(True, "passed")
Example #4
0
    def update_resource(self,
                        resource_dict,
                        local_file,
                        user_name,
                        submission_count=None):
        """ Update the resource with real data """
        logger.info('Updating resource {} from file {}'.format(
            resource_dict['id'], local_file))
        context = {'user': user_name, 'job': True}
        if not resource_dict:
            raise toolkit.ValidationError(
                {'resource': ["empty resource to update"]})
        kobo_details = resource_dict.get('kobo_details')
        if not kobo_details:
            raise toolkit.ValidationError({
                'kobo_details': [
                    "kobo_details is missing from resource {}".format(
                        resource_dict)
                ]
            })
        kobo_download_attempts = kobo_details.get('kobo_download_attempts', 0)

        kobo_details['kobo_download_status'] = 'complete'
        kobo_details['kobo_download_attempts'] = kobo_download_attempts + 1
        if submission_count:
            kobo_details['kobo_submission_count'] = submission_count
        kobo_details['kobo_last_updated'] = datetime.datetime.utcnow(
        ).isoformat()

        resource = toolkit.get_action('resource_patch')(context, {
            'id':
            resource_dict['id'],
            'url_type':
            'upload',
            'upload':
            FlaskFileStorage(filename=local_file,
                             stream=open(local_file, 'rb')),
            'description':
            resource_dict['description'].replace(DOWNLOAD_PENDING_MSG, ''),
            'kobo_details':
            kobo_details,
        })
        logger.info('Resource {} updated {}'.format(
            resource_dict['id'], kobo_details['kobo_last_updated']))
        return resource
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = FlaskFileStorage(filename=file_name,
                                        stream=open(file_path),
                                        content_type='image/png')

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create',
                                context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket=BUCKET_NAME, Key=key)

        # clear upload
        helpers.call_action('group_update',
                            context=context,
                            id='my-group',
                            name='my-group',
                            image_url="http://asdf",
                            clear_upload=True)

        # key shouldn't exist
        try:
            s3.head_object(Bucket=BUCKET_NAME, Key=key)
            # broken by https://github.com/ckan/ckan/commit/48afb9da4d
            # assert_false(True, "file '{}' should not exist".format(key))
        except ClientError:
            # passed
            assert_true(True, "passed")
Example #6
0
    def test_group_image_upload(self):
        '''Test a group image file upload'''
        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = 'somename.png'

        img_uploader = FlaskFileStorage(filename=file_name,
                                        stream=open(file_path),
                                        content_type='image/png')

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create',
                                context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name,
                                save='save')

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto3.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))

        # requesting image redirects to s3
        app = self._get_test_app()
        # attempt redirect to linked url
        image_file_url = '/uploads/group/{0}'.format(file_name)
        r = app.get(image_file_url, status=[302, 301])
        assert_equal(
            r.location,
            'http://localhost:5000/my-bucket/my-path/storage/uploads/group/{0}'
            .format(file_name))
Example #7
0
 def test_reject_bad_file_types(self):
     """ Test that invalid filename/format/content combinations
     are rejected.
     """
     for filename, url, specified_format in sample_file_rejections:
         sample_file = open("test/resources/" + filename, "rb")
         upload = FlaskFileStorage(filename=filename, stream=sample_file)
         resource = {
             'url': url,
             'format': specified_format,
             'upload': upload
         }
         try:
             self.assertRaises(ValidationError,
                               self.validator.validate_resource_mimetype,
                               resource)
             self.assertIsNone(resource.get('mimetype'))
         finally:
             sample_file.close()
Example #8
0
    def test_recognise_file_types(self):
        """ Test that sample files are correctly sniffed.
        """
        for filename, specified_format, expected_type in sample_files:
            sample_file = open("test/resources/" + filename, "rb")
            upload = FlaskFileStorage(filename=filename, stream=sample_file)
            resource = {
                'url': filename,
                'format': specified_format,
                'upload': upload
            }

            try:
                self.validator.validate_resource_mimetype(resource)
                error_msg = '{} has an unexpected MIME type {}'.format(
                    filename, resource['mimetype'])
                if isinstance(expected_type, list):
                    assert_function = self.assertIn
                else:
                    assert_function = self.assertEqual
                assert_function(resource['mimetype'], expected_type, error_msg)
            finally:
                sample_file.close()
Example #9
0
    def create_data_resources(self, context, pkg_dict, survey):
        """ Create multiple resources for the survey data """
        from ckanext.unhcr.jobs import download_kobo_export

        date_range_start, date_range_end = survey.get_submission_times()
        if date_range_start is None:
            survey.load_asset()  # required to get the dates
            # we can use the inacurate creation and modification dates from the survey
            # those are DateTimes and we need just Dates
            date_created = parse_date(survey.asset.get('date_created'))
            date_modified = parse_date(survey.asset.get('date_modified'))

            date_range_start = date_created.strftime('%Y-%m-%d')
            date_range_end = date_modified.strftime('%Y-%m-%d')

        resources = []
        # create empty resources to be updated later
        f = tempfile.NamedTemporaryFile()

        for data_resource_format in ['json', 'csv', 'xls']:
            # JSON do not require an export
            if data_resource_format != 'json':
                # create the export for the expected format (this starts an async process at KoBo)
                export = survey.create_export(dformat=data_resource_format)
                export_id = export['uid']
            else:
                export_id = None

            description = '{} data imported from the KoBoToolbox survey. {}'.format(
                data_resource_format.upper(), DOWNLOAD_PENDING_MSG)
            resource = {
                'package_id':
                pkg_dict['id'],
                'name':
                'Survey {} data'.format(data_resource_format),
                'description':
                description,
                'url_type':
                'upload',
                'upload':
                FlaskFileStorage(filename=f.name, stream=open(f.name, 'rb')),
                'format':
                data_resource_format,
                'type':
                'data',
                'version':
                '1',
                'date_range_start':
                date_range_start,
                'date_range_end':
                date_range_end,
                'visibility':
                'restricted',
                'process_status':
                'raw',
                'identifiability':
                'personally_identifiable',
                'file_type':
                'microdata',
                'kobo_type':
                'data',
                'kobo_details': {
                    'kobo_export_id': export_id,
                    'kobo_asset_id': self.kobo_asset_id,
                    'kobo_download_status': 'pending',
                    'kobo_download_attempts': 0,
                    # To detect new submissions
                    'kobo_submission_count': survey.get_total_submissions(),
                    'kobo_last_updated':
                    datetime.datetime.utcnow().isoformat()
                }
            }

            action = toolkit.get_action("resource_create")
            context.update({'skip_clamav_scan': True})
            resource = action(context, resource)
            resources.append(resource)

            # Start a job to download the file
            toolkit.enqueue_job(
                download_kobo_export, [resource['id']],
                title='Download KoBoToolbox survey {} data'.format(
                    data_resource_format))

        return resources
Example #10
0
def archive_resource(context, resource, log, result=None, url_timeout=30):
    """
    Archive the given resource. Moves the file from the temporary location
    given in download().

    Params:
       result - result of the download(), containing keys: length, saved_file

    If there is a failure, raises ArchiveError.

    Returns: {cache_filepath, cache_url}
    """

    # Return the key used for this resource in storage.
    #
    # Keys are in the form:
    # <uploaderpath>/<upload_to>/<2 char from resource id >/<resource id>/<filename>
    #
    # e.g.:
    # my_storage_path/archive/16/165900ba-3c60-43c5-9e9c-9f8acd0aa93f/data.csv
    relative_archive_path = os.path.join(resource['id'][:2], resource['id'])
    if not uploaderHasDownloadEnabled:
        from ckanext.archiver import default_settings as settings
        archive_dir = os.path.join(settings.ARCHIVE_DIR, relative_archive_path)
        if not os.path.exists(archive_dir):
            os.makedirs(archive_dir)
    # try to get a file name from the url
    parsed_url = urlparse.urlparse(resource.get('url'))
    try:
        file_name = parsed_url.path.split('/')[-1] or 'resource'
        file_name = file_name.strip()  # trailing spaces cause problems
        file_name = file_name.encode('ascii', 'ignore')  # e.g. u'\xa3' signs
        file_name = six.text_type(file_name)
    except Exception:
        file_name = "resource"

    if uploaderHasDownloadEnabled:
        # Get an uploader, set the fields required to upload and upload up.
        save_file_folder = os.path.join('archive', relative_archive_path)

        from werkzeug.datastructures import FileStorage as FlaskFileStorage
        # we use the Upload class to push to our preferred filestorage solution
        toUpload = {
            "fileStorage":
            FlaskFileStorage(filename=file_name,
                             stream=open(result['saved_file']),
                             content_type=result['mimetype']),
            "preserve_filename":
            True
        }
        upload = uploader.get_uploader(save_file_folder)
        upload.update_data_dict(toUpload, 'url_field', 'fileStorage',
                                'clear_field')
        upload.upload(result['size'])
        # delete temp file now that its in real location
        try:
            os.remove(result['saved_file'])
        except OSError:
            pass

        cache_url = urlparse.urljoin(
            config.get('ckan.site_url', ''),
            "/dataset/{0}/resource/{1}/archive/{2}".format(
                resource['package_id'], resource['id'], file_name))
        responsePayload = {
            'cache_filepath': os.path.join(save_file_folder, file_name),
            'cache_url': cache_url
        }
        logging.debug(
            'file uploaded via Uploader to folder: %s, with filename: %s, responsePayload: %s',
            save_file_folder, file_name, responsePayload)
        return responsePayload
    else:
        # move the temp file to the resource's archival directory
        saved_file = os.path.join(archive_dir, file_name)
        shutil.move(result['saved_file'], saved_file)
        log.info('Going to do chmod: %s', saved_file)
        try:
            os.chmod(saved_file, 644)  # allow other users to read it
        except Exception as e:
            log.error('chmod failed %s: %s', saved_file, e)
            raise
        log.info('Archived resource as: %s', saved_file)

        # calculate the cache_url
        if not context.get('cache_url_root'):
            log.warning('Not saved cache_url because no value for '
                        'ckanext-archiver.cache_url_root in config')
            raise ArchiveError(
                _('No value for ckanext-archiver.cache_url_root in config'))
        cache_url = urlparse.urljoin(
            context['cache_url_root'],
            '%s/%s' % (relative_archive_path, file_name))
        return {'cache_filepath': saved_file, 'cache_url': cache_url}