def create_sort_groups(): sysadmin = get_sysadmins()[0].name context = {'user': sysadmin, 'ignore_auth': True} groups = _group_list(context, {}) for group_name in groups: gr = toolkit.get_action('group_show')({ 'user': None }, { 'id': group_name, 'include_datasets': True, 'include_dataset_count': False, 'include_extras': True, 'include_users': False, 'include_groups': False, 'include_tags': False, 'include_followers': False }) if gr.get('state') == 'active': sort_gr = { 'group_id': gr.get('id', ''), 'title_mk': gr.get('title_translated', {}).get('mk', ''), 'title_en': gr.get('title_translated', {}).get('en', ''), 'title_sq': gr.get('title_translated', {}).get('sq', '') } sg = SortGroupsModel(**sort_gr) sg.save() return
def create_sort_organizations(): sysadmin = get_sysadmins()[0].name context = {'user': sysadmin, 'ignore_auth': True} orgs = ckan_organization_list(context, {}) sort_org = [] for org_name in orgs: org = toolkit.get_action('organization_show')( { 'user': None }, { 'id': org_name, 'include_datasets': True, 'include_dataset_count': False, 'include_extras': True, 'include_users': False, 'include_groups': False, 'include_tags': False, 'include_followers': False }) if org.get('state') == 'active': sort_org = { 'org_id': org.get('id', ''), 'title_mk': org.get('title_translated', {}).get('mk', ''), 'title_en': org.get('title_translated', {}).get('en', ''), 'title_sq': org.get('title_translated', {}).get('sq', '') } so = SortOrganizationsModel(**sort_org) so.save() return
def prepare_zip_resources(context, data_dict): """Creates zip archive and stores it under CKAN's storage path. :param resources: a list of ids of the resources :type resources: list :return: a dictionary containing the zip_id of the created archive :rtype: dict """ file_name = uuid.uuid4().hex + '.{ext}'.format(ext='zip') file_path = helpers.get_storage_path_for( 'temp-us_ed_theme') + '/' + file_name resourceArchived = False package_id = None try: resource_ids = data_dict.get('resources') with zipfile.ZipFile(file_path, 'w') as zip: for resource_id in resource_ids: data_dict = {'id': resource_id} resource = toolkit.get_action('resource_show')({}, data_dict) url = resource.get('url') if resource['url_type'] == 'upload': name = url.split('/')[-1] else: name = resource['name'] if os.path.splitext(name)[-1] == '': _format = resource['format'] if _format: name += '.{ext}'.format(ext=_format.lower()) if package_id is None: package_id = resource['package_id'] headers = {'Authorization': get_sysadmins()[0].apikey} try: r = requests.get(url, headers=headers) except Exception: continue content_type = r.headers['Content-Type'].split(';')[0] if content_type in SUPPORTED_RESOURCE_MIMETYPES: resourceArchived = True zip.writestr(name, r.content) except Exception, ex: log.error('An error occured while preparing zip archive. Error: %s' % ex) raise
def get_admin_email(): """Loads the admin email. If a system configuration is present, it is preffered to the CKAN sysadmins. The configuration property is ``ckanext.datagovmk.site_admin_email``. If no email is configured explicitly, then the email of the first CKAN sysadmin is used. :returns: ``str`` the email of the sysadmin to which to send emails with issues. """ sysadmin_email = config.get('ckanext.datagovmk.site_admin_email', False) if sysadmin_email: name = sysadmin_email.split('@')[0] return {'email': sysadmin_email, 'name': name} sysadmins = get_sysadmins() if sysadmins: return { 'email': sysadmins[0].email, 'name': sysadmins[0].fullname or sysadmins[0].name } return None
def download(context, resource, url_timeout=30, max_content_length='default', method='GET'): '''Given a resource, tries to download it. Params: resource - dict of the resource Exceptions from tidy_url may be propagated: LinkInvalidError if the URL is invalid If there is an error performing the download, raises: DownloadException - connection problems etc. DownloadError - HTTP status code is an error or 0 length If download is not suitable (e.g. too large), raises: ChooseNotToDownload If the basic GET fails then it will try it with common API parameters (SPARQL, WMS etc) to get a better response. Returns a dict of results of a successful download: mimetype, size, hash, headers, saved_file, url_redirected_to ''' from ckanext.archiver import default_settings as settings from pylons import config if max_content_length == 'default': max_content_length = settings.MAX_CONTENT_LENGTH url = resource['url'] url = tidy_url(url) if (resource.get('url_type') == 'upload' and not url.startswith('http')): url = context['site_url'].rstrip('/') + url hosted_externally = not url.startswith(config['ckan.site_url']) if resource.get('url_type') == 'upload' and hosted_externally: # ckanext-cloudstorage for example does that # enable ckanext.archiver.archive_cloud for qa to work on cloud resources # till https://github.com/ckan/ckanext-qa/issues/48 is resolved # Warning: this will result in double storage of all files below archival filesize limit if not config.get('ckanext.archiver.archive_cloud', False): raise ChooseNotToDownload( 'Skipping resource hosted externally to download resource: %s' % url, url) headers = _set_user_agent_string({}) if len(get_sysadmins()) > 0: sysadmin = get_sysadmins()[0] if url.startswith(config.get('ckan.site_url', '')): headers['Authorization'] = sysadmin.apikey # start the download - just get the headers # May raise DownloadException method_func = {'GET': requests.get, 'POST': requests.post}[method] res = requests_wrapper( log, method_func, url, timeout=url_timeout, stream=True, headers=headers, verify=verify_https(), ) url_redirected_to = res.url if url != res.url else None if context.get('previous') and ('etag' in res.headers): if context.get('previous').etag == res.headers['etag']: log.info("ETAG matches, not downloading content") raise NotChanged("etag suggests content has not changed") if not res.ok: # i.e. 404 or something raise DownloadError( 'Server reported status error: %s %s' % (res.status_code, res.reason), url_redirected_to) log.info('GET started successfully. Content headers: %r', res.headers) # record headers mimetype = _clean_content_type(res.headers.get('content-type', '').lower()) # make sure resource content-length does not exceed our maximum content_length = res.headers.get('content-length') if content_length: try: content_length = int(content_length) except ValueError: # if there are multiple Content-Length headers, requests # will return all the values, comma separated if ',' in content_length: try: content_length = int(content_length.split(',')[0]) except ValueError: pass if isinstance(content_length, int) and \ int(content_length) >= max_content_length: # record fact that resource is too large to archive log.warning( 'Resource too large to download: %s > max (%s). ' 'Resource: %s %r', content_length, max_content_length, resource['id'], url) raise ChooseNotToDownload( _('Content-length %s exceeds maximum ' 'allowed value %s') % (content_length, max_content_length), url_redirected_to) # content_length in the headers is useful but can be unreliable, so when we # download, we will monitor it doesn't go over the max. # continue the download - stream the response body def get_content(): return res.content log.info('Downloading the body') content = requests_wrapper(log, get_content) # APIs can return status 200, but contain an error message in the body if response_is_an_api_error(content): raise DownloadError(_('Server content contained an API error message: %s') % \ content[:250], url_redirected_to) content_length = len(content) if content_length > max_content_length: raise ChooseNotToDownload( _("Content-length %s exceeds maximum allowed value %s") % (content_length, max_content_length), url_redirected_to) log.info('Saving resource') try: length, hash, saved_file_path = _save_resource(resource, res, max_content_length) except ChooseNotToDownload, e: raise ChooseNotToDownload(str(e), url_redirected_to)
def send_request(self): '''Send mail to resource owner. :param data: Contact form data. :type data: object :rtype: json ''' context = { 'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj } try: if p.toolkit.request.method == 'POST': data = dict(toolkit.request.POST) _get_action('requestdata_request_create', data) except NotAuthorized: abort(403, _('Unauthorized to update this dataset.')) except ValidationError as e: error = {'success': False, 'error': {'fields': e.error_dict}} return json.dumps(error) data_dict = {'id': data['package_id']} package = _get_action('package_show', data_dict) sender_name = data.get('sender_name', '') user_obj = context['auth_user_obj'] data_dict = {'id': user_obj.id, 'permission': 'read'} organizations = _get_action('organization_list_for_user', data_dict) orgs = [] for i in organizations: orgs.append(i['display_name']) org = ','.join(orgs) dataset_name = package['name'] dataset_title = package['title'] email = user_obj.email message = data['message_content'] creator_user_id = package['creator_user_id'] data_owner =\ _get_action('user_show', {'id': creator_user_id}).get('name') if len(get_sysadmins()) > 0: sysadmin = get_sysadmins()[0].name context_sysadmin = { 'model': model, 'session': model.Session, 'user': sysadmin, 'auth_user_obj': c.userobj } to = package['maintainer'] if to is None: message = { 'success': False, 'error': { 'fields': { 'email': 'Dataset maintainer email not found.' } } } return json.dumps(message) maintainers = to.split(',') data_dict = {'users': []} users_email = [] only_org_admins = False data_maintainers = [] # Get users objects from maintainers list for id in maintainers: try: user =\ toolkit.get_action('user_show')(context_sysadmin, {'id': id}) data_dict['users'].append(user) users_email.append(user['email']) data_maintainers.append(user['fullname'] or user['name']) except NotFound: pass mail_subject =\ config.get('ckan.site_title') + ': New data request "'\ + dataset_title + '"' if len(users_email) == 0: admins = self._org_admins_for_dataset(dataset_name) for admin in admins: users_email.append(admin.get('email')) data_maintainers.append(admin.get('fullname')) only_org_admins = True content = _get_email_configuration(sender_name, data_owner, dataset_name, email, message, org, data_maintainers, only_org_admins=only_org_admins) response_message = \ emailer.send_email(content, users_email, mail_subject) # notify package creator that new data request was made _get_action('requestdata_notification_create', data_dict) data_dict = {'package_id': data['package_id'], 'flag': 'request'} action_name = 'requestdata_increment_request_data_counters' _get_action(action_name, data_dict) return json.dumps(response_message) else: message = { 'success': True, 'message': 'Request sent, but email message was not sent.' } return json.dumps(message)
def send_request(self): '''Send mail to resource owner. :param data: Contact form data. :type data: object :rtype: json ''' context = {'model': model, 'session': model.Session, 'user': c.user, 'auth_user_obj': c.userobj} try: if p.toolkit.request.method == 'POST': data = dict(toolkit.request.POST) _get_action('requestdata_request_create', data) except NotAuthorized: abort(403, _('Unauthorized to update this dataset.')) except ValidationError as e: error = { 'success': False, 'error': { 'fields': e.error_dict } } return json.dumps(error) data_dict = {'id': data['package_id']} package = _get_action('package_show', data_dict) sender_name = data.get('sender_name', '') user_obj = context['auth_user_obj'] data_dict = { 'id': user_obj.id, 'permission': 'read' } organizations = _get_action('hdx_organization_list_for_user', data_dict) orgs = [] for i in organizations: orgs.append(i['display_name']) org = ','.join(orgs) dataset_name = package['name'] dataset_title = package['title'] email = user_obj.email message = data['message_content'] creator_user_id = package['creator_user_id'] data_owner =\ _get_action('user_show', {'id': creator_user_id}).get('name') if len(get_sysadmins()) > 0: sysadmin = get_sysadmins()[0].name context_sysadmin = { 'model': model, 'session': model.Session, 'user': sysadmin, 'auth_user_obj': c.userobj } to = package['maintainer'] if to is None: message = { 'success': False, 'error': { 'fields': { 'email': 'Dataset maintainer email not found.' } } } return json.dumps(message) maintainers = to.split(',') data_dict = { 'users': [] } users_email = [] only_org_admins = False data_maintainers = [] data_maintainers_ids = [] # Get users objects from maintainers list user={} for id in maintainers: try: user =\ toolkit.get_action('user_show')(context_sysadmin, {'id': id}) data_dict['users'].append(user) users_email.append({'display_name': user.get('fullname'), 'email': user.get('email')}) data_maintainers.append(user['fullname'] or user['name']) data_maintainers_ids.append(user['name'] or user['id']) except NotFound: pass mail_subject =\ config.get('ckan.site_title') + ': New data request "'\ + dataset_title + '"' if len(users_email) == 0: admins = self._org_admins_for_dataset(dataset_name) # admins=og_create.get_organization_admins(package.get('owner_org')) for admin in admins: users_email.append({'display_name': admin.get('fullname'), 'email': admin.get('email')}) data_maintainers.append(admin.get('fullname')) data_maintainers_ids.append(admin.get('name') or admin.get('id')) only_org_admins = True # content = _get_email_configuration( # sender_name, data_owner, dataset_name, email, # message, org, data_maintainers, # data_maintainers_ids=data_maintainers_ids, # only_org_admins=only_org_admins) # response_message = \ # emailer.send_email(content, users_email, mail_subject) subject = sender_name + u' has requested access to one of your datasets' email_data = { 'user_fullname': sender_name, 'user_email': email, 'msg': message, 'org_name': package.get('organization').get('title'), 'dataset_link': h.url_for('dataset_read', id=dataset_name, qualified=True), 'dataset_title': dataset_title, 'maintainer_fullname': user.get('display_name') or user.get('fullname') if user else 'HDX user', 'requestdata_org_url': h.url_for('requestdata_organization_requests', id=package.get('owner_org'), qualified=True) } hdx_mailer.mail_recipient(users_email, subject, email_data, footer='*****@*****.**', snippet='email/content/request_data_to_admins.html') subject = u'Request for access to metadata-only dataset' email_data = { 'user_fullname': sender_name, 'msg': message, 'org_name': package.get('organization').get('title'), 'dataset_link': h.url_for('dataset_read', id=dataset_name, qualified=True), 'dataset_title': dataset_title, } hdx_mailer.mail_recipient(users_email, subject, email_data, footer=email, snippet='email/content/request_data_to_user.html') # notify package creator that new data request was made _get_action('requestdata_notification_create', data_dict) data_dict = { 'package_id': data['package_id'], 'flag': 'request' } action_name = 'requestdata_increment_request_data_counters' _get_action(action_name, data_dict) response_dict = { 'success': True, 'message': 'Email message was successfully sent.' } return json.dumps(response_dict) else: message = { 'success': True, 'message': 'Request sent, but email message was not sent.' } return json.dumps(message)
def search(self): package_type = self._guess_package_type() try: context = {'model': model, 'user': c.user, 'auth_user_obj': c.userobj} check_access('site_read', context) except NotAuthorized: abort(403, _('Not authorized to see this page')) # unicode format (decoded from utf8) q = c.q = request.params.get('q', u'') search_for = request.params.get('_search-for', u'datasets') # Store search query in KWH data try: if q: sysadmin = get_sysadmins()[0].name sysadmin_context = { 'user': sysadmin, 'ignore_auth': True } kwh_data = { 'type': 'search_query', 'title': q } logic.get_action(u'kwh_data_create')( sysadmin_context, kwh_data ) if search_for not in SKIP_SEARCH_FOR: query_ctx = { 'ignore_auth': True } query_ctx.update(context) query_data = { 'query_text': q, 'query_type': 'dataset' } logic.get_action('user_query_create')( query_ctx, query_data ) except Exception as e: log.debug('Error while storing data: %s' % str(e)) c.query_error = False page = h.get_page_number(request.params) limit = int(config.get('ckan.datasets_per_page', 20)) # most search operations should reset the page counter: params_nopage = [(k, v) for k, v in request.params.items() if k != 'page'] def drill_down_url(alternative_url=None, **by): return h.add_url_param(alternative_url=alternative_url, controller='package', action='search', new_params=by) c.drill_down_url = drill_down_url def remove_field(key, value=None, replace=None): return h.remove_url_param(key, value=value, replace=replace, controller='package', action='search', alternative_url=package_type) c.remove_field = remove_field sort_by = request.params.get('sort', None) params_nosort = [(k, v) for k, v in params_nopage if k != 'sort'] def _sort_by(fields): """ Sort by the given list of fields. Each entry in the list is a 2-tuple: (fieldname, sort_order) eg - [('metadata_modified', 'desc'), ('name', 'asc')] If fields is empty, then the default ordering is used. """ params = params_nosort[:] if fields: sort_string = ', '.join('%s %s' % f for f in fields) params.append(('sort', sort_string)) return search_url(params, package_type) c.sort_by = _sort_by if not sort_by: c.sort_by_fields = [] else: c.sort_by_fields = [field.split()[0] for field in sort_by.split(',')] def pager_url(q=None, page=None): params = list(params_nopage) params.append(('page', page)) return search_url(params, package_type) c.search_url_params = urlencode(_encode_params(params_nopage)) try: c.fields = [] # c.fields_grouped will contain a dict of params containing # a list of values eg {'tags':['tag1', 'tag2']} c.fields_grouped = {} search_extras = {} fq = '' for (param, value) in request.params.items(): if param not in ['q', 'page', 'sort'] \ and len(value) and not param.startswith('_'): if not param.startswith('ext_'): c.fields.append((param, value)) fq += ' %s:"%s"' % (param, value) if param not in c.fields_grouped: c.fields_grouped[param] = [value] else: c.fields_grouped[param].append(value) else: search_extras[param] = value context = {'model': model, 'session': model.Session, 'user': c.user, 'for_view': True, 'auth_user_obj': c.userobj} # Unless changed via config options, don't show other dataset # types any search page. Potential alternatives are do show them # on the default search page (dataset) or on one other search page search_all_type = config.get( 'ckan.search.show_all_types', 'dataset') search_all = False try: # If the "type" is set to True or False, convert to bool # and we know that no type was specified, so use traditional # behaviour of applying this only to dataset type search_all = asbool(search_all_type) search_all_type = 'dataset' # Otherwise we treat as a string representing a type except ValueError: search_all = True if not package_type: package_type = 'dataset' if not search_all or package_type != search_all_type: # Only show datasets of this particular type fq += ' +dataset_type:{type}'.format(type=package_type) facets = OrderedDict() default_facet_titles = { 'organization': _('Functional Units'), 'groups': _('Joint Analysis'), 'tags': _('Tags') } for facet in h.facets(): if facet in default_facet_titles: facets[facet] = default_facet_titles[facet] else: facets[facet] = facet # Facet titles for plugin in p.PluginImplementations(p.IFacets): facets = plugin.dataset_facets(facets, package_type) c.facet_titles = facets data_dict = { 'q': q, 'fq': fq.strip(), 'facet.field': facets.keys(), 'rows': limit, 'start': (page - 1) * limit, 'sort': sort_by, 'extras': search_extras, 'include_private': asbool(config.get( 'ckan.search.default_include_private', True)), } query = get_action('package_search')(context, data_dict) c.sort_by_selected = query['sort'] c.page = h.Page( collection=query['results'], page=page, url=pager_url, item_count=query['count'], items_per_page=limit ) if search_for == 'datasets': c.search_facets = query['search_facets'] else: c.search_facets = {} c.page.items = query['results'] except SearchQueryError as se: # User's search parameters are invalid, in such a way that is not # achievable with the web interface, so return a proper error to # discourage spiders which are the main cause of this. log.info('Dataset search query rejected: %r', se.args) abort(400, _('Invalid search query: {error_message}') .format(error_message=str(se))) except SearchError as se: # May be bad input from the user, but may also be more serious like # bad code causing a SOLR syntax error, or a problem connecting to # SOLR log.error('Dataset search error: %r', se.args) c.query_error = True c.search_facets = {} c.page = h.Page(collection=[]) except NotAuthorized: abort(403, _('Not authorized to see this page')) c.search_facets_limits = {} for facet in c.search_facets.keys(): try: limit = int(request.params.get('_%s_limit' % facet, int(config.get('search.facets.default', 10)))) except ValueError: abort(400, _('Parameter "{parameter_name}" is not ' 'an integer').format( parameter_name='_%s_limit' % facet)) c.search_facets_limits[facet] = limit self._setup_template_variables(context, {}, package_type=package_type) return render(self._search_template(package_type), extra_vars={'dataset_type': package_type})