Esempio n. 1
0
def get_create_mode_from_config():
    if asbool(
            config.get(u'ckanext.validation.run_on_create_sync', False)):
        return u'sync'
    elif asbool(
            config.get(u'ckanext.validation.run_on_create_async', True)):
        return u'async'
    else:
        return None
Esempio n. 2
0
def build_pages_nav_main(*args):

    about_menu = tk.asbool(tk.config.get('ckanext.pages.about_menu', True))
    group_menu = tk.asbool(tk.config.get('ckanext.pages.group_menu', True))
    org_menu = tk.asbool(tk.config.get('ckanext.pages.organization_menu', True))

    # Different CKAN versions use different route names - gotta catch em all!
    about_menu_routes = ['about', 'home.about']
    group_menu_routes = ['group_index', 'home.group_index']
    org_menu_routes = ['organizations_index', 'home.organizations_index']

    new_args = []
    for arg in args:
        if arg[0] in about_menu_routes and not about_menu:
            continue
        if arg[0] in org_menu_routes and not org_menu:
            continue
        if arg[0] in group_menu_routes and not group_menu:
            continue
        new_args.append(arg)

    output = core_build_nav_main(*new_args)

    # do not display any private pages in menu even for sysadmins
    pages_list = tk.get_action('ckanext_pages_list')(None, {'order': True, 'private': False})

    page_name = ''
    if ckan_29_or_higher:
        is_current_page = tk.get_endpoint() in (('pages', 'show'), ('pages', 'blog_show'))
    else:
        is_current_page = (
            hasattr(tk.c, 'action') and tk.c.action in ('pages_show', 'blog_show')
            and tk.c.controller == 'ckanext.pages.controller:PagesController')
    if is_current_page:
        page_name = tk.request.path.split('/')[-1]

    for page in pages_list:
        type_ = 'blog' if page['page_type'] == 'blog' else 'pages'
        if six.PY2:
            name = quote(page['name'].encode('utf-8')).decode('utf-8')
        else:
            name = quote(page['name'])
        title = html_escape(page['title'])
        link = tk.h.literal(u'<a href="/{}/{}">{}</a>'.format(type_, name, title))
        if page['name'] == page_name:
            li = tk.literal('<li class="active">') + link + tk.literal('</li>')
        else:
            li = tk.literal('<li>') + link + tk.literal('</li>')
        output = output + li

    return output
Esempio n. 3
0
    def update_config(self, config):
        self.organization_pages = tk.asbool(config.get('ckanext.pages.organization', False))
        self.group_pages = tk.asbool(config.get('ckanext.pages.group', False))

        tk.add_template_directory(config, '../theme/templates_main')
        if self.group_pages:
            tk.add_template_directory(config, '../theme/templates_group')
        if self.organization_pages:
            tk.add_template_directory(config, '../theme/templates_organization')

        tk.add_resource('../assets', 'pages')

        tk.add_public_directory(config, '../assets/')
        tk.add_public_directory(config, '../assets/vendor/ckeditor/')
        tk.add_public_directory(config, '../assets/vendor/ckeditor/skins/moono-lisa')
Esempio n. 4
0
def get_validation_badge(resource, in_listing=False):

    if in_listing and not asbool(
            config.get('ckanext.validation.show_badges_in_listings', True)):
        return ''

    if not resource.get('validation_status'):
        return ''

    messages = {
        'success': _('Valid data'),
        'failure': _('Invalid data'),
        'error': _('Error during validation'),
        'unknown': _('Data validation unknown'),
    }

    if resource['validation_status'] in ['success', 'failure', 'error']:
        status = resource['validation_status']
    else:
        status = 'unknown'

    validation_url = url_for('validation_read',
                             id=resource['package_id'],
                             resource_id=resource['id'])

    badge_url = url_for_static(
        '/images/badges/data-{}-flat.svg'.format(status))

    return u'''
<a href="{validation_url}" class="validation-badge">
    <img src="{badge_url}" alt="{alt}" title="{title}"/>
</a>'''.format(validation_url=validation_url,
               badge_url=badge_url,
               alt=messages[status],
               title=resource.get('validation_timestamp', ''))
Esempio n. 5
0
    def before_map(self, map):
        with SubMapper(map, controller='ckanext.s3filestore.controller:S3Controller') as m:
            # Override the resource download links
            if not hasattr(DefaultResourceUpload, "download"):
                m.connect('resource_download',
                          '/dataset/{id}/resource/{resource_id}/download',
                          action='resource_download')
                m.connect('resource_download',
                          '/dataset/{id}/resource/{resource_id}/download/{filename}',
                          action='resource_download')
            # Allow fallback to access old files
            use_filename = toolkit.asbool(toolkit.config.get('ckanext.s3filestore.use_filename', False))
            if not use_filename:
                m.connect('resource_download',
                          '/dataset/{id}/resource/{resource_id}/orig_download/{filename}',
                          action='resource_download')

            # fallback controller action to download from the filesystem
            m.connect('filesystem_resource_download',
                      '/dataset/{id}/resource/{resource_id}/fs_download/{filename}',
                      action='filesystem_resource_download')

            # Intercept the uploaded file links (e.g. group images)
            m.connect('uploaded_file', '/uploads/{upload_to}/{filename}',
                      action='uploaded_file_redirect')

        return map
Esempio n. 6
0
def _get_extra_ors_state(extras):
    padding = len(_extra_or_prefix)
    return {
        key[padding:]: tk.asbool(v)
        for key, v in extras.items()
        if key.startswith(_extra_or_prefix)
    }
Esempio n. 7
0
def render_content(content):
    allow_html = tk.asbool(tk.config.get('ckanext.pages.allow_html', False))
    try:
        return tk.h.render_markdown(content, allow_html=allow_html)
    except TypeError:
        # allow_html is only available in CKAN >= 2.3
        return tk.h.render_markdown(content)
def profanity_checking_enabled():
    """Check to see if YTP comments extension is enabled and `check_for_profanity` is enabled

    :rtype: bool

    """
    return ytp_comments_enabled() \
        and toolkit.asbool(config.get('ckan.comments.check_for_profanity', False))
Esempio n. 9
0
    def _params_for_postgis_search(self, bbox, search_params):
        from ckanext.spatial.lib import bbox_query, bbox_query_ordered
        from ckan.lib.search import SearchError

        # Note: This will be deprecated at some point in favour of the
        # Solr 4 spatial sorting capabilities
        if search_params.get('sort') == 'spatial desc' and \
           tk.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')):
            if search_params['q'] or search_params['fq']:
                raise SearchError(
                    'Spatial ranking cannot be mixed with other search parameters'
                )
                # ...because it is too inefficient to use SOLR to filter
                # results and return the entire set to this class and
                # after_search do the sorting and paging.
            extents = bbox_query_ordered(bbox)
            are_no_results = not extents
            search_params['extras']['ext_rows'] = search_params['rows']
            search_params['extras']['ext_start'] = search_params['start']
            # this SOLR query needs to return no actual results since
            # they are in the wrong order anyway. We just need this SOLR
            # query to get the count and facet counts.
            rows = 0
            search_params['sort'] = None  # SOLR should not sort.
            # Store the rankings of the results for this page, so for
            # after_search to construct the correctly sorted results
            rows = search_params['extras']['ext_rows'] = search_params['rows']
            start = search_params['extras']['ext_start'] = search_params[
                'start']
            search_params['extras']['ext_spatial'] = [
                (extent.package_id, extent.spatial_ranking) \
                for extent in extents[start:start+rows]]
        else:
            extents = bbox_query(bbox)
            are_no_results = extents.count() == 0

        if are_no_results:
            # We don't need to perform the search
            search_params['abort_search'] = True
        else:
            # We'll perform the existing search but also filtering by the ids
            # of datasets within the bbox
            bbox_query_ids = [extent.package_id for extent in extents]

            q = search_params.get('q', '').strip() or '""'
            # Note: `"" AND` query doesn't work in github ci
            new_q = '%s AND ' % q if q and q != '""' else ''
            new_q += '(%s)' % ' OR '.join(
                ['id:%s' % id for id in bbox_query_ids])

            search_params['q'] = new_q

        return search_params
Esempio n. 10
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''

        super(S3ResourceUploader, self).__init__()

        self.use_filename = toolkit.asbool(
            config.get('ckanext.s3filestore.use_filename', False))
        path = config.get('ckanext.s3filestore.aws_storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None
        self.url = resource['url']
        # Hold onto resource just in case we need to fallback to Default ResourceUpload from core ckan.lib.uploader
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()
            self.mimetype = resource.get('mimetype')
            if not self.mimetype:
                try:
                    self.mimetype = resource[
                        'mimetype'] = mimetypes.guess_type(self.filename,
                                                           strict=False)[0]
                except Exception:
                    pass
            self.upload_file = _get_underlying_file(upload_field_storage)
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # users cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
Esempio n. 11
0
    def after_search(self, search_results, search_params):
        from ckan.lib.search import PackageSearchQuery

        # Note: This will be deprecated at some point in favour of the
        # Solr 4 spatial sorting capabilities
        if search_params.get('extras', {}).get('ext_spatial') and \
           tk.asbool(config.get('ckanext.spatial.use_postgis_sorting', 'False')):
            # Apply the spatial sort
            querier = PackageSearchQuery()
            pkgs = []
            for package_id, spatial_ranking in search_params['extras'][
                    'ext_spatial']:
                # get package from SOLR
                pkg = querier.get_index(package_id)['data_dict']
                pkgs.append(json.loads(pkg))
            search_results['results'] = pkgs
        return search_results
Esempio n. 12
0
def get_validation_badge(resource, in_listing=False):

    if in_listing and not asbool(
            config.get('ckanext.validation.show_badges_in_listings', True)):
        return ''

    if not resource.get('validation_status'):
        return ''

    statuses = {
        'success': _('success'),
        'failure': _('failure'),
        'invalid': _('invalid'),
        'error': _('error'),
        'unknown': _('unknown'),
    }

    if resource['validation_status'] in ['success', 'failure', 'error']:
        status = resource['validation_status']
        if status == 'failure':
            status = 'invalid'
    else:
        status = 'unknown'

    if check_ckan_version(min_version='2.9.0'):
        action = 'validation.read'
    else:
        action = 'validation_read'

    validation_url = url_for(
        action,
        id=resource['package_id'],
        resource_id=resource['id'])

    return u'''
<a href="{validation_url}" class="validation-badge" title="{title}">
    <span class="prefix">{prefix}</span><span class="status {status}">{status_title}</span>
</a>'''.format(
        validation_url=validation_url,
        prefix=_('data'),
        status=status,
        status_title=statuses[status],
        title=resource.get('validation_timestamp', ''))
Esempio n. 13
0
def ctp_list_types(context, data_dict):
    with_lables = tk.asbool(data_dict.get("with_labels"))

    tk.check_access("ctp_list_types", context, data_dict)
    if _use_scheming():
        types = _get_scheming_types()
    else:
        types = _get_native_types()
    result = list(
        set(types).union(_additional_types()).difference(_exclude_types()))
    if with_lables:
        labels = _labels_from_config()
        result = sorted(
            [{
                "name": t,
                "label": labels.get(t) or tk._(t)
            } for t in result],
            key=itemgetter("label"),
        )
    return result
    def configure(self, config):
        # Certain config options must exists for the plugin to work. Raise an
        # exception if they're missing.
        missing_config = "{0} is not configured. Please amend your .ini file."
        config_options = ('ckanext.s3filestore.aws_access_key_id',
                          'ckanext.s3filestore.aws_secret_access_key',
                          'ckanext.s3filestore.aws_bucket_name',
                          'ckanext.s3filestore.region_name',
                          'ckanext.s3filestore.signature_version',
                          'ckanext.s3filestore.host_name')
        for option in config_options:
            if not config.get(option, None):
                raise RuntimeError(missing_config.format(option))

        # Check that options actually work, if not exceptions will be raised
        if toolkit.asbool(
                config.get('ckanext.s3filestore.check_access_on_startup',
                           True)):
            ckanext.s3filestore.uploader.BaseS3Uploader().get_s3_bucket(
                config.get('ckanext.s3filestore.aws_bucket_name'))
Esempio n. 15
0
    def configure(self, config):
        # Certain config options must exists for the plugin to work. Raise an
        # exception if they're missing.
        missing_config = "{0} is not configured. Please amend your .ini file."
        config_options = (
            'ckanext.s3filestore.aws_access_key_id',
            'ckanext.s3filestore.aws_secret_access_key',
            'ckanext.s3filestore.aws_bucket_name',
            'ckanext.s3filestore.region_name',
            'ckanext.s3filestore.signature_version',
            'ckanext.s3filestore.host_name'
        )
        for option in config_options:
            if not config.get(option, None):
                raise RuntimeError(missing_config.format(option))

        # Check that options actually work, if not exceptions will be raised
        if toolkit.asbool(
                config.get('ckanext.s3filestore.check_access_on_startup',
                           True)):
            ckanext.s3filestore.uploader.BaseS3Uploader().get_s3_bucket(
                config.get('ckanext.s3filestore.aws_bucket_name'))
Esempio n. 16
0
def run_validation_job(resource):

    log.debug(u'Validating resource {}'.format(resource['id']))

    try:
        validation = Session.query(Validation).filter(
            Validation.resource_id == resource['id']).one()
    except NoResultFound:
        validation = None

    if not validation:
        validation = Validation(resource_id=resource['id'])

    validation.status = u'running'
    Session.add(validation)
    Session.commit()

    options = t.config.get(u'ckanext.validation.default_validation_options')
    if options:
        options = json.loads(options)
    else:
        options = {}

    resource_options = resource.get(u'validation_options')
    if resource_options and isinstance(resource_options, basestring):
        resource_options = json.loads(resource_options)
    if resource_options:
        options.update(resource_options)

    dataset = t.get_action('package_show')({
        'ignore_auth': True
    }, {
        'id': resource['package_id']
    })

    source = None
    if resource.get(u'url_type') == u'upload':
        upload = uploader.get_resource_uploader(resource)
        if isinstance(upload, uploader.ResourceUpload):
            source = upload.get_path(resource[u'id'])
        else:
            # Upload is not the default implementation (ie it's a cloud storage
            # implementation)
            pass_auth_header = t.asbool(
                t.config.get(u'ckanext.validation.pass_auth_header', True))
            if dataset[u'private'] and pass_auth_header:
                s = requests.Session()
                s.headers.update({
                    u'Authorization':
                    t.config.get(u'ckanext.validation.pass_auth_header_value',
                                 _get_site_user_api_key())
                })

                options[u'http_session'] = s

    if not source:
        source = resource[u'url']

    schema = resource.get(u'schema')
    if schema and isinstance(schema, basestring):
        if schema.startswith('http'):
            r = requests.get(schema)
            schema = r.json()
        else:
            schema = json.loads(schema)

    _format = resource[u'format'].lower()

    report = _validate_table(source, _format=_format, schema=schema, **options)

    # Hide uploaded files
    for table in report.get('tables', []):
        if table['source'].startswith('/'):
            table['source'] = resource['url']
    for index, warning in enumerate(report.get('warnings', [])):
        report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning)

    if report['table-count'] > 0:
        validation.status = u'success' if report[u'valid'] else u'failure'
        validation.report = report
    else:
        validation.status = u'error'
        validation.error = {
            'message': '\n'.join(report['warnings']) or u'No tables found'
        }
    validation.finished = datetime.datetime.utcnow()

    Session.add(validation)
    Session.commit()

    # Store result status in resource
    t.get_action('resource_patch')(
        {
            'ignore_auth': True,
            'user': t.get_action('get_site_user')({
                'ignore_auth': True
            })['name'],
            '_validation_performed': True
        }, {
            'id': resource['id'],
            'validation_status': validation.status,
            'validation_timestamp': validation.finished.isoformat()
        })
def config_enable_user_id():
    return tk.asbool(tk.config.get("googleanalytics.enable_user_id", False))
def ctp_use_separate_route():
    return tk.asbool(tk.config.get("create_typed_package.use_separate_route"))
Esempio n. 19
0
def _use_scheming():
    return tk.asbool(tk.config.get("create_typed_package.use_scheming"))
Esempio n. 20
0
def _add_dataset_search(showcase_id, showcase_name):
    '''
    Search logic for discovering datasets to add to a showcase.
    '''

    from ckan.lib.search import SearchError

    package_type = 'dataset'

    # unicode format (decoded from utf8)
    q = c.q = tk.request.params.get('q', u'')
    c.query_error = False
    page = h.get_page_number(tk.request.params)

    limit = int(tk.config.get('ckan.datasets_per_page', 20))

    # most search operations should reset the page counter:
    params_nopage = [(k, v) for k, v in tk.request.params.items()
                     if k != 'page']

    def drill_down_url(alternative_url=None, **by):
        return h.add_url_param(alternative_url=alternative_url,
                               controller='dataset'
                               if tk.check_ckan_version('2.9') else 'package',
                               action='search',
                               new_params=by)

    c.drill_down_url = drill_down_url

    def remove_field(key, value=None, replace=None):
        return h.remove_url_param(key,
                                  value=value,
                                  replace=replace,
                                  controller='dataset' if
                                  tk.check_ckan_version('2.9') else 'package',
                                  action='search')

    c.remove_field = remove_field

    sort_by = tk.request.params.get('sort', None)
    params_nosort = [(k, v) for k, v in params_nopage if k != 'sort']

    def _sort_by(fields):
        """
        Sort by the given list of fields.

        Each entry in the list is a 2-tuple: (fieldname, sort_order)

        eg - [('metadata_modified', 'desc'), ('name', 'asc')]

        If fields is empty, then the default ordering is used.
        """
        params = params_nosort[:]

        if fields:
            sort_string = ', '.join('%s %s' % f for f in fields)
            params.append(('sort', sort_string))
        return _search_url(params, showcase_name)

    c.sort_by = _sort_by
    if sort_by is None:
        c.sort_by_fields = []
    else:
        c.sort_by_fields = [field.split()[0] for field in sort_by.split(',')]

    def pager_url(q=None, page=None):
        params = list(params_nopage)
        params.append(('page', page))
        return _search_url(params, showcase_name)

    c.search_url_params = urlencode(_encode_params(params_nopage))

    try:
        c.fields = []
        # c.fields_grouped will contain a dict of params containing
        # a list of values eg {'tags':['tag1', 'tag2']}
        c.fields_grouped = {}
        search_extras = {}
        fq = ''
        for (param, value) in tk.request.params.items():
            if param not in ['q', 'page', 'sort'] \
                    and len(value) and not param.startswith('_'):
                if not param.startswith('ext_'):
                    c.fields.append((param, value))
                    fq += ' %s:"%s"' % (param, value)
                    if param not in c.fields_grouped:
                        c.fields_grouped[param] = [value]
                    else:
                        c.fields_grouped[param].append(value)
                else:
                    search_extras[param] = value

        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'for_view': True,
            'auth_user_obj': c.userobj
        }

        if package_type and package_type != 'dataset':
            # Only show datasets of this particular type
            fq += ' +dataset_type:{type}'.format(type=package_type)
        else:
            # Unless changed via config options, don't show non standard
            # dataset types on the default search page
            if not tk.asbool(
                    tk.config.get('ckan.search.show_all_types', 'False')):
                fq += ' +dataset_type:dataset'

        # Only search for packages that aren't already associated with the
        # Showcase
        associated_package_ids = ShowcasePackageAssociation.get_package_ids_for_showcase(
            showcase_id)
        # flatten resulting list to space separated string
        if associated_package_ids:
            associated_package_ids_str = \
                ' OR '.join([id[0] for id in associated_package_ids])
            fq += ' !id:({0})'.format(associated_package_ids_str)

        facets = OrderedDict()

        default_facet_titles = {
            'organization': _('Organizations'),
            'groups': _('Groups'),
            'tags': _('Tags'),
            'res_format': _('Formats'),
            'license_id': _('Licenses'),
        }

        # for CKAN-Versions that do not provide the facets-method from
        # helper-context, import facets from ckan.common
        if hasattr(h, 'facets'):
            current_facets = h.facets()
        else:
            from ckan.common import g
            current_facets = g.facets

        for facet in current_facets:
            if facet in default_facet_titles:
                facets[facet] = default_facet_titles[facet]
            else:
                facets[facet] = facet

        # Facet titles
        for plugin in p.PluginImplementations(p.IFacets):
            facets = plugin.dataset_facets(facets, package_type)

        c.facet_titles = facets

        data_dict = {
            'q': q,
            'fq': fq.strip(),
            'facet.field': list(facets.keys()),
            'rows': limit,
            'start': (page - 1) * limit,
            'sort': sort_by,
            'extras': search_extras
        }

        query = tk.get_action('package_search')(context, data_dict)
        c.sort_by_selected = query['sort']

        c.page = h.Page(collection=query['results'],
                        page=page,
                        url=pager_url,
                        item_count=query['count'],
                        items_per_page=limit)
        c.facets = query['facets']
        c.search_facets = query['search_facets']
        c.page.items = query['results']
    except SearchError as se:
        log.error('Dataset search error: %r', se.args)
        c.query_error = True
        c.facets = {}
        c.search_facets = {}
        c.page = h.Page(collection=[])
    c.search_facets_limits = {}
    for facet in c.search_facets.keys():
        try:
            limit = int(
                tk.request.params.get(
                    '_%s_limit' % facet,
                    int(tk.config.get('search.facets.default', 10))))
        except tk.ValueError:
            abort(
                400,
                _("Parameter '{parameter_name}' is not an integer").format(
                    parameter_name='_%s_limit' % facet))
        c.search_facets_limits[facet] = limit
Esempio n. 21
0
    def __init__(self, resource):
        '''Setup the resource uploader. Actual uploading performed by
        `upload()`.

        Create a storage path in the format:
        <ckanext.s3filestore.aws_storage_path>/resources/
        '''

        super(S3ResourceUploader, self).__init__()

        self.use_filename = toolkit.asbool(
            config.get('ckanext.s3filestore.use_filename', False))
        path = config.get('ckanext.s3filestore.aws_storage_path', '')
        self.storage_path = os.path.join(path, 'resources')
        self.filename = None
        self.old_filename = None
        self.url = resource['url']
        # Hold onto resource just in case we need to fallback to Default ResourceUpload from core ckan.lib.uploader
        self.resource = resource

        upload_field_storage = resource.pop('upload', None)
        self.clear = resource.pop('clear_upload', None)

        mime = magic.Magic(mime=True)

        if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES):
            self.filesize = 0  # bytes

            self.filename = upload_field_storage.filename
            self.filename = munge.munge_filename(self.filename)
            resource['url'] = self.filename
            resource['url_type'] = 'upload'
            resource['last_modified'] = datetime.datetime.utcnow()

            # Check the resource format from its filename extension,
            # if no extension use the default CKAN implementation
            if 'format' not in resource:
                resource_format = os.path.splitext(self.filename)[1][1:]
                if resource_format:
                    resource['format'] = resource_format

            self.upload_file = _get_underlying_file(upload_field_storage)
            self.upload_file.seek(0, os.SEEK_END)
            self.filesize = self.upload_file.tell()
            # go back to the beginning of the file buffer
            self.upload_file.seek(0, os.SEEK_SET)

            self.mimetype = resource.get('mimetype')
            if not self.mimetype:
                try:
                    # 512 bytes should be enough for a mimetype check
                    self.mimetype = resource['mimetype'] = mime.from_buffer(
                        self.upload_file.read(512))

                    # additional check on text/plain mimetypes for
                    # more reliable result, if None continue with text/plain
                    if self.mimetype == 'text/plain':
                        self.mimetype = resource['mimetype'] = \
                            mimetypes.guess_type(self.filename, strict=False)[0] or 'text/plain'
                    # go back to the beginning of the file buffer
                    self.upload_file.seek(0, os.SEEK_SET)
                except Exception:
                    pass
        elif self.clear and resource.get('id'):
            # New, not yet created resources can be marked for deletion if the
            # users cancels an upload and enters a URL instead.
            old_resource = model.Session.query(model.Resource) \
                .get(resource['id'])
            self.old_filename = old_resource.url
            resource['url_type'] = ''
Esempio n. 22
0
def run_validation_job(resource):
    vsh = ValidationStatusHelper()
    # handle either a resource dict or just an ID
    # ID is more efficient, as resource dicts can be very large
    if isinstance(resource, string_types):
        log.debug(u'run_validation_job: calling resource_show: %s', resource)
        resource = t.get_action('resource_show')({'ignore_auth': True}, {'id': resource})

    resource_id = resource.get('id')
    if resource_id:
        log.debug(u'Validating resource: %s', resource_id)
    else:
        log.debug(u'Validating resource dict: %s', resource)
    validation_record = None
    try:
        validation_record = vsh.updateValidationJobStatus(Session, resource_id, StatusTypes.running)
    except ValidationJobAlreadyRunning as e:
        log.error("Won't run enqueued job %s as job is already running or in invalid state: %s", resource['id'], e)
        return
    except ValidationJobDoesNotExist:
        validation_record = vsh.createValidationJob(Session, resource['id'])
        validation_record = vsh.updateValidationJobStatus(
            session=Session, resource_id=resource_id,
            status=StatusTypes.running, validationRecord=validation_record)

    options = t.config.get(
        u'ckanext.validation.default_validation_options')
    if options:
        options = json.loads(options)
    else:
        options = {}

    resource_options = resource.get(u'validation_options')
    if resource_options and isinstance(resource_options, string_types):
        resource_options = json.loads(resource_options)
    if resource_options:
        options.update(resource_options)

    dataset = t.get_action('package_show')(
        {'ignore_auth': True}, {'id': resource['package_id']})

    source = None
    if resource.get(u'url_type') == u'upload':
        upload = uploader.get_resource_uploader(resource)
        if isinstance(upload, uploader.ResourceUpload):
            source = upload.get_path(resource[u'id'])
        else:
            # Upload is not the default implementation (ie it's a cloud storage
            # implementation)
            pass_auth_header = t.asbool(
                t.config.get(u'ckanext.validation.pass_auth_header', True))
            if dataset[u'private'] and pass_auth_header:
                s = requests.Session()
                s.headers.update({
                    u'Authorization': t.config.get(
                        u'ckanext.validation.pass_auth_header_value',
                        _get_site_user_api_key())
                })

                options[u'http_session'] = s

    if not source:
        source = resource[u'url']

    schema = resource.get(u'schema')
    if schema and isinstance(schema, string_types):
        if schema.startswith('http'):
            r = requests.get(schema)
            schema = r.json()
        else:
            schema = json.loads(schema)

    _format = resource[u'format'].lower()

    report = _validate_table(source, _format=_format, schema=schema, **options)

    # Hide uploaded files
    for table in report.get('tables', []):
        if table['source'].startswith('/'):
            table['source'] = resource['url']
    for index, warning in enumerate(report.get('warnings', [])):
        report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning)

    if report['table-count'] > 0:
        status = StatusTypes.success if report[u'valid'] else StatusTypes.failure
        validation_record = vsh.updateValidationJobStatus(Session, resource['id'], status, report, None, validation_record)
    else:
        status = StatusTypes.error
        error_payload = {'message': '\n'.join(report['warnings']) or u'No tables found'}
        validation_record = vsh.updateValidationJobStatus(Session, resource['id'], status, None, error_payload, validation_record)

    # Store result status in resource
    t.get_action('resource_patch')(
        {'ignore_auth': True,
         'user': t.get_action('get_site_user')({'ignore_auth': True})['name'],
         '_validation_performed': True},
        {'id': resource['id'],
         'validation_status': validation_record.status,
         'validation_timestamp': validation_record.finished.isoformat()})
Esempio n. 23
0
    def configure(self, config):
        from ckanext.spatial.model.package_extent import setup as setup_model

        if not tk.asbool(config.get('ckan.spatial.testing', 'False')):
            log.debug('Setting up the spatial model')
            setup_model()
def get_closing_circumstance_list():
    circumstances = []
    if toolkit.asbool(config.get('ckan.datarequests.enable_closing_circumstances', False)):
        from ckanext.datarequests import helpers
        circumstances = helpers.get_closing_circumstances()
    return circumstances