def make_pylons_stack(conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(conf, app_conf) # The Pylons WSGI app app = pylons_app = CKANPylonsApp() # set pylons globals app_globals.reset() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) # we want to be able to retrieve the routes middleware to be able to update # the mapper. We store it in the pylons config to allow this. config['routes.middleware'] = app app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) # app = QueueLogMiddleware(app) if asbool(config.get('ckan.use_pylons_response_cleanup_middleware', True)): app = execute_on_completion(app, config, cleanup_pylons_response_string) # Fanstatic if asbool(config.get('debug', False)): fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': True, } app = Fanstatic(app, **fanstatic_config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.'.format( plugin.__class__.__name__)) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, conf, **config['pylons.errorware']) # Display error documents for 400, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 403, 404]) else: app = StatusCodeRedirect(app, [400, 403, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(app_conf['who.config_file'])) app = PluggableAuthenticationMiddleware( app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key) # Establish the Registry for this application app = RegistryManager(app) app = common_middleware.I18nMiddleware(app, config) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] storage_directory = uploader.get_storage_path() if storage_directory: path = os.path.join(storage_directory, 'storage') try: os.makedirs(path) except OSError, e: # errno 17 is file already exists if e.errno != 17: raise storage_app = StaticURLParser(path, cache_max_age=static_max_age) static_parsers.insert(0, storage_app) # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age)) app = Cascade(extra_static_parsers + static_parsers)
def submit(): ''' Take the data in the request params and send an email using them. If the data is invalid or a recaptcha is setup and it fails, don't send the email. :return: a dict of details ''' # this variable holds the status of sending the email email_success = True # pull out the data from the request data_dict = logic.clean_dict( unflatten(logic.tuplize_dict(logic.parse_params( toolkit.request.values)))) # validate the request params errors, error_summary, recaptcha_error = validate(data_dict) # if there are not errors and no recaptcha error, attempt to send the email if len(errors) == 0 and recaptcha_error is None: # Mail title by request type if data_dict.get(u'report_type', '') == 'general': report_mail_title = u'Contact US - General - Government Data' elif data_dict.get(u'report_type', '') == 'dataset_req': report_mail_title = u'Contact US - Dataset Request - Government Data' else: report_mail_title = u'Contact US - Report - Government Data' # general content body = u'{}\n\nSent by:\nName: {}\nEmail: {}\n'.format(data_dict[u'content'], data_dict[u'name'], data_dict[u'email']) # if report page was from resource page -adding resource data if data_dict.get('type','') == 'report_mail': report_mail_title = 'Report Broken Link - Government Data' body += '\n' + ('Dataset ID') + ': ' + data_dict[u'dataset_id'] body += '\n' + ('Dataset Title') + ': ' + data_dict[u'dataset_title'] body += '\n' + ('Dataset Author') + ': ' + data_dict[u'dataset_author'] body += '\n' + ('Resource ID') + ': ' + data_dict[u'resource_id'] body += '\n' + ('Resource Title') + ': ' + data_dict[u'resource_title'] body += '\n' + ('Dataset Author') + ': ' + data_dict[u'dataset_author'] body += '\n' + ('Organization') + ': ' + data_dict[u'organization_name'] body += '\n' + ('Link to data') + ': ' + "{}/dataset/{}/resource/{}".format(toolkit.config.get('ckan.site_url', '//localhost:5000'), data_dict[u'dataset_id'], data_dict[u'resource_id']) body += '\n\n' + (u'Best Regards') body += '\n' + (u'Government Data Site') mail_dict = { u'recipient_email': toolkit.config.get(u'ckanext.contact.mail_to', toolkit.config.get(u'email_to')), u'recipient_name': toolkit.config.get(u'ckanext.contact.recipient_name', toolkit.config.get(u'ckan.site_title')), u'subject': report_mail_title, u'body': body, u'headers': { u'reply-to': data_dict[u'email'] } } # allow other plugins to modify the mail_dict for plugin in PluginImplementations(IContact): plugin.mail_alter(mail_dict, data_dict) try: custom_mailer.mail_recipient(**mail_dict) except (mailer.MailerException, socket.error): email_success = False return { u'success': recaptcha_error is None and len(errors) == 0 and email_success, u'data': data_dict, u'errors': errors, u'error_summary': error_summary, u'recaptcha_error': recaptcha_error, }
def make_flask_stack(conf, **app_conf): """ This has to pass the flask app through all the same middleware that Pylons used """ root = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) debug = asbool(conf.get('debug', conf.get('DEBUG', False))) testing = asbool(app_conf.get('testing', app_conf.get('TESTING', False))) app = flask_app = CKANFlask(__name__) app.debug = debug app.testing = testing app.template_folder = os.path.join(root, 'templates') app.app_ctx_globals_class = CKAN_AppCtxGlobals app.url_rule_class = CKAN_Rule app.jinja_options = jinja_extensions.get_jinja_env_options() # Update Flask config with the CKAN values. We use the common config # object as values might have been modified on `load_environment` if config: app.config.update(config) else: app.config.update(conf) app.config.update(app_conf) # Do all the Flask-specific stuff before adding other middlewares # Secret key needed for flask-debug-toolbar and sessions if not app.config.get('SECRET_KEY'): app.config['SECRET_KEY'] = config.get('beaker.session.secret') if not app.config.get('SECRET_KEY'): raise RuntimeError(u'You must provide a value for the secret key' ' with the SECRET_KEY config option') if debug: from flask_debugtoolbar import DebugToolbarExtension app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False DebugToolbarExtension(app) # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app, request): if 'beaker.session' in request.environ: return request.environ['beaker.session'] def save_session(self, app, session, response): session.save() namespace = 'beaker.session.' session_opts = dict([(k.replace('beaker.', ''), v) for k, v in config.iteritems() if k.startswith(namespace)]) if (not session_opts.get('session.data_dir') and session_opts.get('session.type', 'file') == 'file'): cache_dir = app_conf.get('cache_dir') or app_conf.get('cache.dir') session_opts['session.data_dir'] = '{data_dir}/sessions'.format( data_dir=cache_dir) app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts) app.session_interface = BeakerSessionInterface() # Add Jinja2 extensions and filters app.jinja_env.filters['empty_and_escape'] = \ jinja_extensions.empty_and_escape # Common handlers for all requests app.before_request(ckan_before_request) app.after_request(ckan_after_request) # Template context processors app.context_processor(helper_functions) app.context_processor(c_object) @app.context_processor def ungettext_alias(): u''' Provide `ungettext` as an alias of `ngettext` for backwards compatibility ''' return dict(ungettext=ungettext) # Babel pairs = [(os.path.join(root, u'i18n'), 'ckan') ] + [(p.i18n_directory(), p.i18n_domain()) for p in PluginImplementations(ITranslation)] i18n_dirs, i18n_domains = zip(*pairs) app.config[u'BABEL_TRANSLATION_DIRECTORIES'] = ';'.join(i18n_dirs) app.config[u'BABEL_DOMAIN'] = 'ckan' app.config[u'BABEL_MULTIPLE_DOMAINS'] = ';'.join(i18n_domains) babel = CKANBabel(app) babel.localeselector(get_locale) @app.route('/hello', methods=['GET']) def hello_world(): return 'Hello World, this is served by Flask' @app.route('/hello', methods=['POST']) def hello_world_post(): return 'Hello World, this was posted to Flask' # Auto-register all blueprints defined in the `views` folder _register_core_blueprints(app) _register_error_handler(app) # Set up each IBlueprint extension as a Flask Blueprint for plugin in PluginImplementations(IBlueprint): if hasattr(plugin, 'get_blueprint'): app.register_extension_blueprint(plugin.get_blueprint()) # Set flask routes in named_routes for rule in app.url_map.iter_rules(): if '.' not in rule.endpoint: continue controller, action = rule.endpoint.split('.') needed = list(rule.arguments - set(rule.defaults or {})) route = { rule.endpoint: { 'action': action, 'controller': controller, 'highlight_actions': action, 'needed': needed } } config['routes.named_routes'].update(route) # Start other middleware for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Fanstatic if debug: fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': False, } root_path = config.get('ckan.root_path', None) if root_path: root_path = re.sub('/{{LANG}}', '', root_path) fanstatic_config['base_url'] = root_path app = Fanstatic(app, **fanstatic_config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.'.format( plugin.__class__.__name__)) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(app_conf['who.config_file'])) app = PluggableAuthenticationMiddleware( app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key) # Update the main CKAN config object with the Flask specific keys # that were set here or autogenerated flask_config_keys = set(flask_app.config.keys()) - set(config.keys()) for key in flask_config_keys: config[key] = flask_app.config[key] # Add a reference to the actual Flask app so it's easier to access app._wsgi_app = flask_app return app
def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return # tracking summary values will be stale, never store them tracking_summary = pkg_dict.pop('tracking_summary', None) for r in pkg_dict.get('resources', []): r.pop('tracking_summary', None) data_dict_json = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): package_plugin = lib_plugins.lookup_package_plugin( pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = lib_plugins.plugin_validate( package_plugin, {'model': model, 'session': model.Session}, pkg_dict, schema, 'package_show') pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict, cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) pkg_dict['data_dict'] = data_dict_json # add to string field for sorting title = pkg_dict.get('title') if title: pkg_dict['title_string'] = title # delete the package if there is no state, or the state is `deleted` if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + pkg_dict.keys() # include the extras in the main namespace extras = pkg_dict.get('extras', []) for extra in extras: key, value = extra['key'], extra['value'] if isinstance(value, (tuple, list)): value = " ".join(map(text_type, value)) key = ''.join([c for c in key if c in KEY_CHARS]) pkg_dict['extras_' + key] = value if key not in index_fields: pkg_dict[key] = value pkg_dict.pop('extras', None) # add tags, removing vocab tags from 'tags' list and adding them as # vocab_<tag name> so that they can be used in facets non_vocab_tag_names = [] tags = pkg_dict.pop('tags', []) context = {'model': model} for tag in tags: if tag.get('vocabulary_id'): data = {'id': tag['vocabulary_id']} vocab = logic.get_action('vocabulary_show')(context, data) key = u'vocab_%s' % vocab['name'] if key in pkg_dict: pkg_dict[key].append(tag['name']) else: pkg_dict[key] = [tag['name']] else: non_vocab_tag_names.append(tag['name']) pkg_dict['tags'] = non_vocab_tag_names # add groups groups = pkg_dict.pop('groups', []) # we use the capacity to make things private in the search index if pkg_dict['private']: pkg_dict['capacity'] = 'private' else: pkg_dict['capacity'] = 'public' pkg_dict['groups'] = [group['name'] for group in groups] # if there is an owner_org we want to add this to groups for index # purposes if pkg_dict.get('organization'): pkg_dict['organization'] = pkg_dict['organization']['name'] else: pkg_dict['organization'] = None # tracking if not tracking_summary: tracking_summary = model.TrackingSummary.get_for_package( pkg_dict['id']) pkg_dict['views_total'] = tracking_summary['total'] pkg_dict['views_recent'] = tracking_summary['recent'] resource_fields = [('name', 'res_name'), ('description', 'res_description'), ('format', 'res_format'), ('url', 'res_url'), ('resource_type', 'res_type')] resource_extras = [(e, 'res_extras_' + e) for e in model.Resource.get_extra_columns()] # flatten the structure for indexing: for resource in pkg_dict.get('resources', []): for (okey, nkey) in resource_fields + resource_extras: pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')] pkg_dict.pop('resources', None) rel_dict = collections.defaultdict(list) subjects = pkg_dict.pop("relationships_as_subject", []) objects = pkg_dict.pop("relationships_as_object", []) for rel in objects: type = model.PackageRelationship.forward_to_reverse_type(rel['type']) rel_dict[type].append(model.Package.get(rel['subject_package_id']).name) for rel in subjects: type = rel['type'] rel_dict[type].append(model.Package.get(rel['object_package_id']).name) for key, value in rel_dict.iteritems(): if key not in pkg_dict: pkg_dict[key] = value pkg_dict[TYPE_FIELD] = PACKAGE_TYPE # Save dataset type pkg_dict['dataset_type'] = pkg_dict['type'] # clean the dict fixing keys and dates # FIXME where are we getting these dirty keys from? can we not just # fix them in the correct place or is this something that always will # be needed? For my data not changing the keys seems to not cause a # problem. new_dict = {} bogus_date = datetime.datetime(1, 1, 1) for key, value in pkg_dict.items(): key = key.encode('ascii', 'ignore') if key.endswith('_date'): try: date = parse(value, default=bogus_date) if date != bogus_date: value = date.isoformat() + 'Z' else: # The date field was empty, so dateutil filled it with # the default bogus date value = None except ValueError: continue new_dict[key] = value pkg_dict = new_dict for k in ('title', 'notes', 'title_string'): if k in pkg_dict and pkg_dict[k]: pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k]) # modify dates (SOLR is quite picky with dates, and only accepts ISO dates # with UTC time (i.e trailing Z) # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html pkg_dict['metadata_created'] += 'Z' pkg_dict['metadata_modified'] += 'Z' # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') # Strip a selection of the fields. # These fields are possible candidates for sorting search results on, # so we strip leading spaces because solr will sort " " before "a" or "A". for field_name in ['title']: try: value = pkg_dict.get(field_name) if value: pkg_dict[field_name] = value.lstrip() except KeyError: pass # add a unique index_id to avoid conflicts import hashlib pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest() for item in PluginImplementations(IPackageController): pkg_dict = item.before_index(pkg_dict) assert pkg_dict, 'Plugin must return non empty package dict on index' # permission labels determine visibility in search, can't be set # in original dataset or before_index plugins labels = lib_plugins.get_permission_labels() dataset = model.Package.get(pkg_dict['id']) pkg_dict['permission_labels'] = labels.get_dataset_labels( dataset) if dataset else [] # TestPackageSearchIndex-workaround # send to solr: try: conn = make_connection() commit = not defer_commit if not asbool(config.get('ckan.search.solr_commit', 'true')): commit = False conn.add(docs=[pkg_dict], commit=commit) except pysolr.SolrError as e: msg = 'Solr returned an error: {0}'.format( e[:1000] # limit huge responses ) raise SearchIndexError(msg) except socket.error as e: err = 'Could not connect to Solr using {0}: {1}'.format(conn.url, str(e)) log.error(err) raise SearchIndexError(err) commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed' log.debug('Updated index for %s [%s]' % (pkg_dict.get('name'), commit_debug_msg))
return False obj.retry_times += 1 obj.save() if obj.retry_times >= 5: obj.state = "ERROR" obj.save() log.error('Too many consecutive retries for object {0}'.format(obj.id)) channel.basic_ack(method.delivery_tag) return False # Send the harvest object to the plugins that implement # the Harvester interface, only if the source type # matches for harvester in PluginImplementations(IHarvester): if harvester.info()['name'] == obj.source.type: fetch_and_import_stages(harvester, obj) model.Session.remove() channel.basic_ack(method.delivery_tag) def fetch_and_import_stages(harvester, obj): obj.fetch_started = datetime.datetime.utcnow() obj.state = "FETCH" obj.save() success_fetch = harvester.fetch_stage(obj) obj.fetch_finished = datetime.datetime.utcnow() obj.save() if success_fetch is True:
def get_harvester(harvest_source_type): for harvester in PluginImplementations(IHarvester): if harvester.info()['name'] == harvest_source_type: return harvester
def make_flask_stack(conf, **app_conf): """ This has to pass the flask app through all the same middleware that Pylons used """ root = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) debug = asbool(app_conf.get('debug', app_conf.get('DEBUG', False))) testing = asbool(app_conf.get('testing', app_conf.get('TESTING', False))) app = flask_app = CKANFlask(__name__) app.debug = debug app.testing = testing app.template_folder = os.path.join(root, 'templates') app.app_ctx_globals_class = CKAN_AppCtxGlobals app.url_rule_class = CKAN_Rule # Update Flask config with the CKAN values. We use the common config # object as values might have been modified on `load_environment` if config: app.config.update(config) else: app.config.update(conf) app.config.update(app_conf) # Do all the Flask-specific stuff before adding other middlewares # Secret key needed for flask-debug-toolbar and sessions if not app.config.get('SECRET_KEY'): app.config['SECRET_KEY'] = config.get('beaker.session.secret') if not app.config.get('SECRET_KEY'): raise RuntimeError(u'You must provide a value for the secret key' ' with the SECRET_KEY config option') if debug: from flask_debugtoolbar import DebugToolbarExtension app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False DebugToolbarExtension(app) # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app, request): if 'beaker.session' in request.environ: return request.environ['beaker.session'] def save_session(self, app, session, response): session.save() namespace = 'beaker.session.' session_opts = dict([(k.replace('beaker.', ''), v) for k, v in config.iteritems() if k.startswith(namespace)]) if (not session_opts.get('session.data_dir') and session_opts.get('session.type', 'file') == 'file'): cache_dir = app_conf.get('cache_dir') or app_conf.get('cache.dir') session_opts['session.data_dir'] = '{data_dir}/sessions'.format( data_dir=cache_dir) app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts) app.session_interface = BeakerSessionInterface() # Add Jinja2 extensions and filters extensions = [ 'jinja2.ext.do', 'jinja2.ext.with_', jinja_extensions.SnippetExtension, jinja_extensions.CkanExtend, jinja_extensions.CkanInternationalizationExtension, jinja_extensions.LinkForExtension, jinja_extensions.ResourceExtension, jinja_extensions.UrlForStaticExtension, jinja_extensions.UrlForExtension ] for extension in extensions: app.jinja_env.add_extension(extension) app.jinja_env.filters['empty_and_escape'] = \ jinja_extensions.empty_and_escape # Common handlers for all requests app.before_request(ckan_before_request) app.after_request(ckan_after_request) # Template context processors app.context_processor(helper_functions) app.context_processor(c_object) @app.route('/hello', methods=['GET']) def hello_world(): return 'Hello World, this is served by Flask' @app.route('/hello', methods=['POST']) def hello_world_post(): return 'Hello World, this was posted to Flask' # Auto-register all blueprints defined in the `views` folder _register_core_blueprints(app) # Set up each IBlueprint extension as a Flask Blueprint for plugin in PluginImplementations(IBlueprint): if hasattr(plugin, 'get_blueprint'): app.register_extension_blueprint(plugin.get_blueprint()) # Start other middleware for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Fanstatic if debug: fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': True, } app = Fanstatic(app, **fanstatic_config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.'.format( plugin.__class__.__name__)) # Update the main CKAN config object with the Flask specific keys # that were set here or autogenerated flask_config_keys = set(flask_app.config.keys()) - set(config.keys()) for key in flask_config_keys: config[key] = flask_app.config[key] # Add a reference to the actual Flask app so it's easier to access app._wsgi_app = flask_app return app
def load_environment(global_conf, app_conf): """Configure the Pylons environment via the ``pylons.config`` object """ ###### Pylons monkey-patch # this must be run at a time when the env is semi-setup, thus inlined here. # Required by the deliverance plugin and iATI from pylons.wsgiapp import PylonsApp import pkg_resources find_controller_generic = PylonsApp.find_controller # This is from pylons 1.0 source, will monkey-patch into 0.9.7 def find_controller(self, controller): if controller in self.controller_classes: return self.controller_classes[controller] # Check to see if its a dotted name if '.' in controller or ':' in controller: mycontroller = pkg_resources.EntryPoint.parse( 'x=%s' % controller).load(False) self.controller_classes[controller] = mycontroller return mycontroller return find_controller_generic(self, controller) PylonsApp.find_controller = find_controller ###### END evil monkey-patch # Pylons paths root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) paths = dict(root=root, controllers=os.path.join(root, 'controllers'), static_files=os.path.join(root, 'public'), templates=[os.path.join(root, 'templates')]) # Initialize config with the basic options config.init_app(global_conf, app_conf, package='ckan', paths=paths) # load all CKAN plugins plugins.load_all(config) from ckan.plugins import PluginImplementations from ckan.plugins.interfaces import IConfigurer for plugin in PluginImplementations(IConfigurer): # must do update in place as this does not work: # config = plugin.update_config(config) plugin.update_config(config) # This is set up before globals are initialized site_url = config.get('ckan.site_url', '') ckan_host = config['ckan.host'] = urlparse(site_url).netloc if config.get('ckan.site_id') is None: if ':' in ckan_host: ckan_host, port = ckan_host.split(':') config['ckan.site_id'] = ckan_host config['routes.map'] = make_map() config['pylons.app_globals'] = app_globals.Globals() config['pylons.h'] = ckan.lib.helpers ## redo template setup to use genshi.search_path (so remove std template setup) template_paths = [paths['templates'][0]] extra_template_paths = config.get('extra_template_paths', '') if extra_template_paths: # must be first for them to override defaults template_paths = extra_template_paths.split(',') + template_paths # Translator (i18n) translator = Translator(pylons.translator) def template_loaded(template): translator.setup(template) # Markdown ignores the logger config, so to get rid of excessive # markdown debug messages in the log, set it to the level of the # root logger. logging.getLogger("MARKDOWN").setLevel(logging.getLogger().level) # Create the Genshi TemplateLoader # config['pylons.app_globals'].genshi_loader = TemplateLoader( # paths['templates'], auto_reload=True) # tmpl_options["genshi.loader_callback"] = template_loaded config['pylons.app_globals'].genshi_loader = TemplateLoader( template_paths, auto_reload=True, callback=template_loaded) # CONFIGURATION OPTIONS HERE (note: all config options will override # any Pylons config options) # Setup the SQLAlchemy database engine engine = engine_from_config(config, 'sqlalchemy.') if not model.meta.engine: model.init_model(engine) from ckan.plugins import PluginImplementations from ckan.plugins.interfaces import IConfigurable for plugin in PluginImplementations(IConfigurable): plugin.configure(config)
def get_action(action): '''Return the ckan.logic.action function named by the given string. For example: get_action('package_create') will normally return the ckan.logic.action.create.py:package_create() function. Rather than importing a ckan.logic.action function and calling it directly, you should always fetch the function via get_action(): # Call the package_create action function: get_action('package_create')(context, data_dict) This is because CKAN plugins can override action functions using the IActions plugin interface, causing get_action() to return a plugin-provided function instead of the default one. As the context parameter passed to an action function is commonly: context = {'model': ckan.model, 'session': ckan.model.Session, 'user': pylons.c.user or pylons.c.author} an action function returned by get_action() will automatically add these parameters to the context if they are not defined. This is especially useful for extensions as they should not really be importing parts of ckan eg ckan.model and as such do not have access to model or model.Session. If a context of None is passed to the action function then the context dict will be created. :param action: name of the action function to return :type action: string :returns: the named action function :rtype: callable ''' if _actions: if not action in _actions: raise KeyError("Action '%s' not found" % action) return _actions.get(action) # Otherwise look in all the plugins to resolve all possible # First get the default ones in the ckan/logic/action directory # Rather than writing them out in full will use __import__ # to load anything from ckan.logic.action that looks like it might # be an action for action_module_name in ['get', 'create', 'update', 'delete']: module_path = 'ckan.logic.action.' + action_module_name module = __import__(module_path) for part in module_path.split('.')[1:]: module = getattr(module, part) for k, v in module.__dict__.items(): if not k.startswith('_'): # Only load functions from the action module. if isinstance(v, types.FunctionType): _actions[k] = v # Whitelist all actions defined in logic/action/get.py as # being side-effect free. v.side_effect_free = getattr(v, 'side_effect_free', True)\ and action_module_name == 'get' # Then overwrite them with any specific ones in the plugins: resolved_action_plugins = {} fetched_actions = {} for plugin in PluginImplementations(IActions): for name, auth_function in plugin.get_actions().items(): if name in resolved_action_plugins: raise Exception( 'The action %r is already implemented in %r' % ( name, resolved_action_plugins[name] ) ) log.debug('Auth function %r was inserted', plugin.name) resolved_action_plugins[name] = plugin.name fetched_actions[name] = auth_function # Use the updated ones in preference to the originals. _actions.update(fetched_actions) # wrap the functions for action_name, _action in _actions.items(): def make_wrapped(_action, action_name): def wrapped(context=None, data_dict=None, **kw): if kw: log.critical('%s was pass extra keywords %r' % (_action.__name__, kw)) if context is None: context = {} context.setdefault('model', model) context.setdefault('session', model.Session) try: context.setdefault('user', base.c.user or base.c.author) except TypeError: # c not registered pass return _action(context, data_dict, **kw) return wrapped fn = make_wrapped(_action, action_name) # we need to mirror the docstring fn.__doc__ = _action.__doc__ # we need to retain the side effect free behaviour if getattr(_action, 'side_effect_free', False): fn.side_effect_free = True _actions[action_name] = fn return _actions.get(action)
def __init__(self): BaseController.__init__(self) self.extensions = PluginImplementations(IGroupController)
def datastore_search(context, data_dict, original_data_dict): ''' Searches the datastore using a query schema similar to the standard CKAN datastore query schema, but with versioning. :param context: the context dict from the action call :param data_dict: the data_dict from the action call :param original_data_dict: the data_dict before it was validated :return: a dict including the search results amongst other things ''' original_data_dict, data_dict, version, search = create_search( context, data_dict, original_data_dict) resource_id = data_dict[u'resource_id'] index_name = prefix_resource(resource_id) # if the version is None, default it to the current timestamp if version is None: version = to_timestamp(datetime.now()) # add the version filter to the query search = search.filter(create_version_query(version)) # if the run query option is false (default to true if not present) then just return the query # we would have run against elasticsearch instead of actually running it. This is useful for # running the query outside of ckan, for example on a tile server. if not data_dict.get(u'run_query', True): return { u'indexes': [index_name], u'search': search.to_dict(), } else: result = run_search(search, [index_name]) # allow other extensions implementing our interface to modify the result for plugin in PluginImplementations(IVersionedDatastore): result = plugin.datastore_modify_result(context, original_data_dict, data_dict, result) # add the actual result object to the context in case the caller is an extension and they # have used one of the interface hooks to alter the search object and include, for example, # an aggregation context[u'versioned_datastore_query_result'] = result # get the fields mapping, fields = get_fields(resource_id, version) # allow other extensions implementing our interface to modify the field definitions for plugin in PluginImplementations(IVersionedDatastore): fields = plugin.datastore_modify_fields(resource_id, mapping, fields) query_for_logging = {} for key in _query_log_keys: if data_dict.get(key, None): query_for_logging[key] = data_dict[key] log_query(query_for_logging, u'basicsearch') # return a dictionary containing the results and other details return { u'total': result.hits.total, u'records': [hit.data.to_dict() for hit in result], u'facets': format_facets(result.aggs.to_dict()), u'fields': fields, u'raw_fields': mapping[u'mappings'][DOC_TYPE][u'properties'][u'data'] [u'properties'], u'after': get_last_after(result.hits), u'_backend': u'versioned-datastore', }
def datastore_search_raw(resource_id, context, data_dict, original_data_dict, search=None, version=None, raw_result=False, include_version=True): ''' Searches the datastore using a raw elasticsearch query. :param resource_id: the id of the resource to search :param context: the context dict from the action call :param data_dict: the data_dict from the action call :param original_data_dict: the data_dict before it was validated :param search: the elasticsearch query to run :param version: the version of the data to query against :param raw_result: whether to return the result as a raw elasticsearch result, or format it in the same way as a normal datastore_search call would :param include_version: whether to include the version in the search or not :return: a dict containing the results of the search ''' if search is None: search = {} if version is None: version = to_timestamp(datetime.now()) index_name = prefix_resource(resource_id) search = Search.from_dict(search) try: # the user has asked for a raw result and that the version filter is not included if raw_result and not include_version: version = None # run the query passing the version which will either be the requested version, the current # timestamp or None if no version filter should be included in the search result = run_search(search, index_name, version) if raw_result: return result.to_dict() # allow other extensions implementing our interface to modify the result object for plugin in PluginImplementations(IVersionedDatastore): result = plugin.datastore_modify_result(context, original_data_dict, data_dict, result) # add the actual result object to the context in case the caller is an extension and # they have used one of the interface hooks to alter the search object and include, for # example, an aggregation context[u'versioned_datastore_query_result'] = result # get the fields mapping, fields = get_fields(resource_id, version) # allow other extensions implementing our interface to modify the field definitions for plugin in PluginImplementations(IVersionedDatastore): fields = plugin.datastore_modify_fields(resource_id, mapping, fields) # return a dictionary containing the results and other details return { u'total': result.hits.total, u'records': [hit.data.to_dict() for hit in result], u'facets': format_facets(result.aggs.to_dict()), u'fields': fields, u'raw_fields': mapping[u'mappings'][DOC_TYPE][u'properties'][u'data'] [u'properties'], u'after': get_last_after(result.hits), u'_backend': u'versioned-datastore', } except RequestError as e: raise toolkit.ValidationError(str(e))
def make_flask_stack(conf: Union[Config, CKANConfig]) -> CKANApp: """ This has to pass the flask app through all the same middleware that Pylons used """ root = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) debug = asbool(conf.get('debug', conf.get('DEBUG', False))) testing = asbool(conf.get('testing', conf.get('TESTING', False))) app = flask_app = CKANFlask(__name__, static_url_path='') # Register storage for accessing group images, site logo, etc. storage_folder = [] storage = uploader.get_storage_path() if storage: storage_folder = [os.path.join(storage, 'storage')] # Static files folders (core and extensions) public_folder = config.get_value(u'ckan.base_public_folder') app.static_folder = config.get_value('extra_public_paths').split(',') + [ os.path.join(root, public_folder) ] + storage_folder app.jinja_options = jinja_extensions.get_jinja_env_options() app.jinja_env.policies['ext.i18n.trimmed'] = True app.debug = debug app.testing = testing app.template_folder = os.path.join(root, 'templates') app.app_ctx_globals_class = CKAN_AppCtxGlobals app.url_rule_class = CKAN_Rule # Update Flask config with the CKAN values. We use the common config # object as values might have been modified on `load_environment` if config: app.config.update(config) else: app.config.update(conf) # Do all the Flask-specific stuff before adding other middlewares # Secret key needed for flask-debug-toolbar and sessions if not app.config.get('SECRET_KEY'): app.config['SECRET_KEY'] = config.get_value('beaker.session.secret') if not app.config.get('SECRET_KEY'): raise RuntimeError(u'You must provide a value for the secret key' ' with the SECRET_KEY config option') root_path = config.get_value('ckan.root_path') if debug: from flask_debugtoolbar import DebugToolbarExtension app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False debug_ext = DebugToolbarExtension() # register path that includes `ckan.site_root` before # initializing debug app. In such a way, our route receives # higher precedence. # TODO: After removal of Pylons code, switch to # `APPLICATION_ROOT` config value for flask application. Right # now it's a bad option because we are handling both pylons # and flask urls inside helpers and splitting this logic will # bring us tons of headache. if root_path: app.add_url_rule( root_path.replace('{{LANG}}', '').rstrip('/') + '/_debug_toolbar/static/<path:filename>', '_debug_toolbar.static', debug_ext.send_static_file) debug_ext.init_app(app) from werkzeug.debug import DebuggedApplication app.wsgi_app = DebuggedApplication(app.wsgi_app, True) # Use Beaker as the Flask session interface class BeakerSessionInterface(SessionInterface): def open_session(self, app: Any, request: Any): if 'beaker.session' in request.environ: return request.environ['beaker.session'] def save_session(self, app: Any, session: Any, response: Any): session.save() namespace = 'beaker.session.' session_opts = { k.replace('beaker.', ''): v for k, v in config.items() if k.startswith(namespace) } if (not session_opts.get('session.data_dir') and session_opts.get('session.type', 'file') == 'file'): cache_dir = conf.get('cache_dir') or conf.get('cache.dir') session_opts['session.data_dir'] = '{data_dir}/sessions'.format( data_dir=cache_dir) app.wsgi_app = RootPathMiddleware(app.wsgi_app) app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts) app.session_interface = BeakerSessionInterface() # Add Jinja2 extensions and filters app.jinja_env.filters['empty_and_escape'] = \ jinja_extensions.empty_and_escape # Common handlers for all requests # # flask types do not mention that it's possible to return a response from # the `before_request` callback app.before_request(ckan_before_request) app.after_request(ckan_after_request) # Template context processors app.context_processor(helper_functions) app.context_processor(c_object) app.context_processor(_ungettext_alias) # Babel _ckan_i18n_dir = i18n.get_ckan_i18n_dir() pairs = [cast("tuple[str, str]", (_ckan_i18n_dir, u'ckan')) ] + [(p.i18n_directory(), p.i18n_domain()) for p in reversed(list(PluginImplementations(ITranslation)))] i18n_dirs, i18n_domains = zip(*pairs) app.config[u'BABEL_TRANSLATION_DIRECTORIES'] = ';'.join(i18n_dirs) app.config[u'BABEL_DOMAIN'] = 'ckan' app.config[u'BABEL_MULTIPLE_DOMAINS'] = ';'.join(i18n_domains) app.config[u'BABEL_DEFAULT_TIMEZONE'] = str(helpers.get_display_timezone()) babel = CKANBabel(app) babel.localeselector(get_locale) # WebAssets _setup_webassets(app) # Auto-register all blueprints defined in the `views` folder _register_core_blueprints(app) _register_error_handler(app) # Set up each IBlueprint extension as a Flask Blueprint for plugin in PluginImplementations(IBlueprint): plugin_blueprints = plugin.get_blueprint() if not isinstance(plugin_blueprints, list): plugin_blueprints = [plugin_blueprints] for blueprint in plugin_blueprints: app.register_extension_blueprint(blueprint) lib_plugins.register_package_blueprints(app) lib_plugins.register_group_blueprints(app) # Start other middleware for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.'.format( plugin.__class__.__name__)) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(cast(str, conf['who.config_file']))) app = PluggableAuthenticationMiddleware( RepozeAdapterMiddleware(app), who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key) # Update the main CKAN config object with the Flask specific keys # that were set here or autogenerated flask_config_keys = set(flask_app.config.keys()) - set(config.keys()) for key in flask_config_keys: config[key] = flask_app.config[key] # Prevent the host from request to be added to the new header location. app = HostHeaderMiddleware(app) app = I18nMiddleware(app) if config.get_value('ckan.tracking_enabled'): app = TrackingMiddleware(app, config) # Add a reference to the actual Flask app so it's easier to access # type_ignore_reason: custom attribute app._wsgi_app = flask_app # type: ignore return app
def extract(ini_path, res_dict): """ Download resource, extract and store metadata. The extracted metadata is stored in the database. Note that this task does check whether the resource exists in the database, whether the resource's format is indexed or whether there is an existing task working on the resource's metadata. This is the responsibility of the caller. The task does check which metadata fields are configured to be indexed and only stores those in the database. Any previously stored metadata for the resource is cleared. """ load_config(ini_path) # Get package data before doing any hard work so that we can fail # early if the package is private. try: toolkit.get_action('package_show')({'validate': False}, {'id': res_dict['package_id']}) except toolkit.NotAuthorized: log.debug(('Not extracting resource {} since it belongs to the ' + 'private dataset {}.').format(res_dict['id'], res_dict['package_id'])) return try: metadata = ResourceMetadata.one(resource_id=res_dict['id']) except NoResultFound: metadata = ResourceMetadata.create(resource_id=res_dict['id']) try: metadata.last_url = res_dict['url'] metadata.last_format = res_dict['format'] metadata.last_extracted = datetime.datetime.now() metadata.meta.clear() extracted = download_and_extract(res_dict['url']) for plugin in PluginImplementations(IExtractorPostprocessor): plugin.extractor_after_extract(res_dict, extracted) for key, value in extracted.iteritems(): if not is_field_indexed(key): continue # Some documents contain multiple values for the same field. This # is not supported in our database model, hence we collapse these # into a single value. if isinstance(value, list): log.debug('Collapsing multiple values for metadata field ' + '"{}" in resource {} into a single value.'.format(key, res_dict['id'])) value = ', '.join(value) metadata.meta[key] = value except RequestException as e: log.warn('Failed to download resource data from "{}": {}'.format( res_dict['url'], e.message)) finally: metadata.task_id = None metadata.save() for plugin in PluginImplementations(IExtractorPostprocessor): plugin.extractor_after_save(res_dict, metadata.as_dict()) # We need to update the search index for the package here. Note that # we cannot rely on the automatic update that happens when a resource # is changed, since our extraction task runs asynchronously and may # be finished only when the automatic index update has already run. search.rebuild(package_id=res_dict['package_id']) for plugin in PluginImplementations(IExtractorPostprocessor): plugin.extractor_after_index(res_dict, metadata.as_dict())
def harvest_objects_import(context, data_dict): ''' Reimports the current harvest objects It performs the import stage with the last fetched objects, optionally belonging to a certain source. Please note that no objects will be fetched from the remote server. It will only affect the last fetched objects already present in the database. ''' log.info('Harvest objects import: %r', data_dict) check_access('harvest_objects_import', context, data_dict) model = context['model'] session = context['session'] source_id = data_dict.get('source_id', None) harvest_object_id = data_dict.get('harvest_object_id', None) package_id_or_name = data_dict.get('package_id', None) segments = context.get('segments', None) join_datasets = context.get('join_datasets', True) if source_id: source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) if not source.active: log.warn('Harvest source %s is not active.', source_id) raise Exception('This harvest source is not active') last_objects_ids = session.query(HarvestObject.id) \ .join(HarvestSource) \ .filter(HarvestObject.source==source) \ .filter(HarvestObject.current==True) elif harvest_object_id: last_objects_ids = session.query(HarvestObject.id) \ .filter(HarvestObject.id==harvest_object_id) elif package_id_or_name: last_objects_ids = session.query(HarvestObject.id) \ .join(Package) \ .filter(HarvestObject.current==True) \ .filter(Package.state==u'active') \ .filter(or_(Package.id==package_id_or_name, Package.name==package_id_or_name)) join_datasets = False else: last_objects_ids = session.query(HarvestObject.id) \ .filter(HarvestObject.current==True) if join_datasets: last_objects_ids = last_objects_ids.join(Package) \ .filter(Package.state==u'active') last_objects_ids = last_objects_ids.all() last_objects_count = 0 for obj_id in last_objects_ids: if segments and str(hashlib.md5( obj_id[0]).hexdigest())[0] not in segments: continue obj = session.query(HarvestObject).get(obj_id) for harvester in PluginImplementations(IHarvester): if harvester.info()['name'] == obj.source.type: if hasattr(harvester, 'force_import'): harvester.force_import = True harvester.import_stage(obj) break last_objects_count += 1 log.info('Harvest objects imported: %s', last_objects_count) return last_objects_count
"""Routes configuration The more specific and detailed routes should be defined first so they may take precedent over the more generic routes. For more information refer to the routes manual at http://routes.groovie.org/docs/ """ from pylons import config from routes import Mapper from ckan.plugins import PluginImplementations, IRoutes routing_plugins = PluginImplementations(IRoutes) def make_map(): """Create, configure and return the routes Mapper""" # import controllers here rather than at root level because # pylons config is initialised by this point. # Helpers to reduce code clutter GET = dict(method=['GET']) PUT = dict(method=['PUT']) POST = dict(method=['POST']) DELETE = dict(method=['DELETE']) GET_POST = dict(method=['GET', 'POST']) PUT_POST = dict(method=['PUT', 'POST']) PUT_POST_DELETE = dict(method=['PUT', 'POST', 'DELETE']) OPTIONS = dict(method=['OPTIONS']) from ckan.lib.plugins import register_package_plugins from ckan.lib.plugins import register_group_plugins
def harvest_objects_import(context, data_dict): ''' Reimports the existing harvest objects, specified by either source_id, harvest_object_id or package_id. It performs the import stage with the last fetched objects, optionally belonging to a certain source. Please note that no objects will be fetched from the remote server. It will only affect the last fetched objects already present in the database. :param source_id: the id of the harvest source to import :type source_id: string :param guid: the guid of the harvest object to import :type guid: string :param harvest_object_id: the id of the harvest object to import :type harvest_object_id: string :param package_id: the id or name of the package to import :type package_id: string ''' log.info('Harvest objects import: %r', data_dict) check_access('harvest_objects_import', context, data_dict) session = context['session'] source_id = data_dict.get('source_id') guid = data_dict.get('guid') harvest_object_id = data_dict.get('harvest_object_id') package_id_or_name = data_dict.get('package_id') segments = context.get('segments') join_datasets = context.get('join_datasets', True) if guid: last_objects_ids = \ session.query(HarvestObject.id) \ .filter(HarvestObject.guid == guid) \ .filter(HarvestObject.current == True) # noqa: E712 elif source_id: source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) if not source.active: log.warn('Harvest source %s is not active.', source_id) raise Exception('This harvest source is not active') last_objects_ids = \ session.query(HarvestObject.id) \ .join(HarvestSource) \ .filter(HarvestObject.source == source) \ .filter(HarvestObject.current == True) # noqa: E712 elif harvest_object_id: last_objects_ids = \ session.query(HarvestObject.id) \ .filter(HarvestObject.id == harvest_object_id) elif package_id_or_name: last_objects_ids = ( session.query(HarvestObject.id).join(Package).filter( HarvestObject.current == True) # noqa: E712 .filter(Package.state == u'active').filter( or_(Package.id == package_id_or_name, Package.name == package_id_or_name))) join_datasets = False else: last_objects_ids = \ session.query(HarvestObject.id) \ .filter(HarvestObject.current == True) # noqa: E712 if join_datasets: last_objects_ids = last_objects_ids.join(Package) \ .filter(Package.state == u'active') last_objects_ids = last_objects_ids.all() last_objects_count = 0 for obj_id in last_objects_ids: if segments and \ str(hashlib.md5(six.ensure_binary(obj_id[0])).hexdigest())[0] not in segments: continue obj = session.query(HarvestObject).get(obj_id) for harvester in PluginImplementations(IHarvester): if harvester.info()['name'] == obj.source.type: if hasattr(harvester, 'force_import'): harvester.force_import = True harvester.import_stage(obj) break last_objects_count += 1 log.info('Harvest objects imported: %s', last_objects_count) return last_objects_count