Exemple #1
0
def make_pylons_stack(conf, full_stack=True, static_files=True, **app_conf):
    """Create a Pylons WSGI application and return it

    ``conf``
        The inherited configuration for this application. Normally from
        the [DEFAULT] section of the Paste ini file.

    ``full_stack``
        Whether this application provides a full WSGI stack (by default,
        meaning it handles its own exceptions and errors). Disable
        full_stack when this application is "managed" by another WSGI
        middleware.

    ``static_files``
        Whether this application serves its own static files; disable
        when another web server is responsible for serving them.

    ``app_conf``
        The application's local configuration. Normally specified in
        the [app:<name>] section of the Paste ini file (where <name>
        defaults to main).

    """
    # Configure the Pylons environment
    load_environment(conf, app_conf)

    # The Pylons WSGI app
    app = pylons_app = CKANPylonsApp()

    # set pylons globals
    app_globals.reset()

    for plugin in PluginImplementations(IMiddleware):
        app = plugin.make_middleware(app, config)

    # Routing/Session/Cache Middleware
    app = RoutesMiddleware(app, config['routes.map'])
    # we want to be able to retrieve the routes middleware to be able to update
    # the mapper.  We store it in the pylons config to allow this.
    config['routes.middleware'] = app
    app = SessionMiddleware(app, config)
    app = CacheMiddleware(app, config)

    # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares)
    # app = QueueLogMiddleware(app)
    if asbool(config.get('ckan.use_pylons_response_cleanup_middleware', True)):
        app = execute_on_completion(app, config,
                                    cleanup_pylons_response_string)

    # Fanstatic
    if asbool(config.get('debug', False)):
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': True,
            'minified': False,
            'bottom': True,
            'bundle': False,
        }
    else:
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': False,
            'minified': True,
            'bottom': True,
            'bundle': True,
        }
    app = Fanstatic(app, **fanstatic_config)

    for plugin in PluginImplementations(IMiddleware):
        try:
            app = plugin.make_error_log_middleware(app, config)
        except AttributeError:
            log.critical('Middleware class {0} is missing the method'
                         'make_error_log_middleware.'.format(
                             plugin.__class__.__name__))

    if asbool(full_stack):
        # Handle Python exceptions
        app = ErrorHandler(app, conf, **config['pylons.errorware'])

        # Display error documents for 400, 403, 404 status codes (and
        # 500 when debug is disabled)
        if asbool(config['debug']):
            app = StatusCodeRedirect(app, [400, 403, 404])
        else:
            app = StatusCodeRedirect(app, [400, 403, 404, 500])

    # Initialize repoze.who
    who_parser = WhoConfig(conf['here'])
    who_parser.parse(open(app_conf['who.config_file']))

    app = PluggableAuthenticationMiddleware(
        app,
        who_parser.identifiers,
        who_parser.authenticators,
        who_parser.challengers,
        who_parser.mdproviders,
        who_parser.request_classifier,
        who_parser.challenge_decider,
        logging.getLogger('repoze.who'),
        logging.WARN,  # ignored
        who_parser.remote_user_key)

    # Establish the Registry for this application
    app = RegistryManager(app)

    app = common_middleware.I18nMiddleware(app, config)

    if asbool(static_files):
        # Serve static files
        static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \
            else int(config.get('ckan.static_max_age', 3600))

        static_app = StaticURLParser(config['pylons.paths']['static_files'],
                                     cache_max_age=static_max_age)
        static_parsers = [static_app, app]

        storage_directory = uploader.get_storage_path()
        if storage_directory:
            path = os.path.join(storage_directory, 'storage')
            try:
                os.makedirs(path)
            except OSError, e:
                # errno 17 is file already exists
                if e.errno != 17:
                    raise

            storage_app = StaticURLParser(path, cache_max_age=static_max_age)
            static_parsers.insert(0, storage_app)

        # Configurable extra static file paths
        extra_static_parsers = []
        for public_path in config.get('extra_public_paths', '').split(','):
            if public_path.strip():
                extra_static_parsers.append(
                    StaticURLParser(public_path.strip(),
                                    cache_max_age=static_max_age))
        app = Cascade(extra_static_parsers + static_parsers)
Exemple #2
0
def submit():
    '''
    Take the data in the request params and send an email using them. If the data is invalid or
    a recaptcha is setup and it fails, don't send the email.

    :return: a dict of details
    '''
    # this variable holds the status of sending the email
    email_success = True

    # pull out the data from the request
    data_dict = logic.clean_dict(
        unflatten(logic.tuplize_dict(logic.parse_params(
            toolkit.request.values))))

    # validate the request params
    errors, error_summary, recaptcha_error = validate(data_dict)

    # if there are not errors and no recaptcha error, attempt to send the email
    if len(errors) == 0 and recaptcha_error is None:

        # Mail title by request type
        if data_dict.get(u'report_type', '') == 'general':
            report_mail_title = u'Contact US - General - Government Data'
        elif data_dict.get(u'report_type', '') == 'dataset_req':
            report_mail_title = u'Contact US - Dataset Request - Government Data'
        else:
            report_mail_title = u'Contact US - Report - Government Data'

        # general content
        body = u'{}\n\nSent by:\nName: {}\nEmail: {}\n'.format(data_dict[u'content'],
                                                               data_dict[u'name'],
                                                               data_dict[u'email'])

        # if report page was from resource page -adding resource data
        if data_dict.get('type','') == 'report_mail':
            report_mail_title = 'Report Broken Link - Government Data'
            body += '\n' + ('Dataset ID') + ': ' + data_dict[u'dataset_id']
            body += '\n' + ('Dataset Title') + ': ' + data_dict[u'dataset_title']
            body += '\n' + ('Dataset Author') + ': ' + data_dict[u'dataset_author']
            body += '\n' + ('Resource ID') + ': ' + data_dict[u'resource_id']
            body += '\n' + ('Resource Title') + ': ' + data_dict[u'resource_title']
            body += '\n' + ('Dataset Author') + ': ' + data_dict[u'dataset_author']
            body += '\n' + ('Organization') + ': ' + data_dict[u'organization_name']
            body += '\n' + ('Link to data') + ': ' + "{}/dataset/{}/resource/{}".format(toolkit.config.get('ckan.site_url', '//localhost:5000'),
                                                                                            data_dict[u'dataset_id'],
                                                                                          data_dict[u'resource_id'])
            body += '\n\n' + (u'Best Regards')
            body += '\n' + (u'Government Data Site')

        mail_dict = {
            u'recipient_email': toolkit.config.get(u'ckanext.contact.mail_to',
                                                   toolkit.config.get(u'email_to')),
            u'recipient_name': toolkit.config.get(u'ckanext.contact.recipient_name',
                                                  toolkit.config.get(u'ckan.site_title')),
            u'subject': report_mail_title,
            u'body': body,
            u'headers': {
                u'reply-to': data_dict[u'email']
                }
            }

        # allow other plugins to modify the mail_dict
        for plugin in PluginImplementations(IContact):
            plugin.mail_alter(mail_dict, data_dict)

        try:
            custom_mailer.mail_recipient(**mail_dict)
        except (mailer.MailerException, socket.error):
            email_success = False

    return {
        u'success': recaptcha_error is None and len(errors) == 0 and email_success,
        u'data': data_dict,
        u'errors': errors,
        u'error_summary': error_summary,
        u'recaptcha_error': recaptcha_error,
        }
Exemple #3
0
def make_flask_stack(conf, **app_conf):
    """ This has to pass the flask app through all the same middleware that
    Pylons used """

    root = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

    debug = asbool(conf.get('debug', conf.get('DEBUG', False)))
    testing = asbool(app_conf.get('testing', app_conf.get('TESTING', False)))

    app = flask_app = CKANFlask(__name__)
    app.debug = debug
    app.testing = testing
    app.template_folder = os.path.join(root, 'templates')
    app.app_ctx_globals_class = CKAN_AppCtxGlobals
    app.url_rule_class = CKAN_Rule

    app.jinja_options = jinja_extensions.get_jinja_env_options()
    # Update Flask config with the CKAN values. We use the common config
    # object as values might have been modified on `load_environment`
    if config:
        app.config.update(config)
    else:
        app.config.update(conf)
        app.config.update(app_conf)

    # Do all the Flask-specific stuff before adding other middlewares

    # Secret key needed for flask-debug-toolbar and sessions
    if not app.config.get('SECRET_KEY'):
        app.config['SECRET_KEY'] = config.get('beaker.session.secret')
    if not app.config.get('SECRET_KEY'):
        raise RuntimeError(u'You must provide a value for the secret key'
                           ' with the SECRET_KEY config option')

    if debug:
        from flask_debugtoolbar import DebugToolbarExtension
        app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False
        DebugToolbarExtension(app)

    # Use Beaker as the Flask session interface
    class BeakerSessionInterface(SessionInterface):
        def open_session(self, app, request):
            if 'beaker.session' in request.environ:
                return request.environ['beaker.session']

        def save_session(self, app, session, response):
            session.save()

    namespace = 'beaker.session.'
    session_opts = dict([(k.replace('beaker.', ''), v)
                         for k, v in config.iteritems()
                         if k.startswith(namespace)])
    if (not session_opts.get('session.data_dir')
            and session_opts.get('session.type', 'file') == 'file'):
        cache_dir = app_conf.get('cache_dir') or app_conf.get('cache.dir')
        session_opts['session.data_dir'] = '{data_dir}/sessions'.format(
            data_dir=cache_dir)

    app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts)
    app.session_interface = BeakerSessionInterface()

    # Add Jinja2 extensions and filters
    app.jinja_env.filters['empty_and_escape'] = \
        jinja_extensions.empty_and_escape

    # Common handlers for all requests
    app.before_request(ckan_before_request)
    app.after_request(ckan_after_request)

    # Template context processors
    app.context_processor(helper_functions)
    app.context_processor(c_object)

    @app.context_processor
    def ungettext_alias():
        u'''
        Provide `ungettext` as an alias of `ngettext` for backwards
        compatibility
        '''
        return dict(ungettext=ungettext)

    # Babel
    pairs = [(os.path.join(root, u'i18n'), 'ckan')
             ] + [(p.i18n_directory(), p.i18n_domain())
                  for p in PluginImplementations(ITranslation)]

    i18n_dirs, i18n_domains = zip(*pairs)

    app.config[u'BABEL_TRANSLATION_DIRECTORIES'] = ';'.join(i18n_dirs)
    app.config[u'BABEL_DOMAIN'] = 'ckan'
    app.config[u'BABEL_MULTIPLE_DOMAINS'] = ';'.join(i18n_domains)

    babel = CKANBabel(app)

    babel.localeselector(get_locale)

    @app.route('/hello', methods=['GET'])
    def hello_world():
        return 'Hello World, this is served by Flask'

    @app.route('/hello', methods=['POST'])
    def hello_world_post():
        return 'Hello World, this was posted to Flask'

    # Auto-register all blueprints defined in the `views` folder
    _register_core_blueprints(app)
    _register_error_handler(app)

    # Set up each IBlueprint extension as a Flask Blueprint
    for plugin in PluginImplementations(IBlueprint):
        if hasattr(plugin, 'get_blueprint'):
            app.register_extension_blueprint(plugin.get_blueprint())

    # Set flask routes in named_routes
    for rule in app.url_map.iter_rules():
        if '.' not in rule.endpoint:
            continue
        controller, action = rule.endpoint.split('.')
        needed = list(rule.arguments - set(rule.defaults or {}))
        route = {
            rule.endpoint: {
                'action': action,
                'controller': controller,
                'highlight_actions': action,
                'needed': needed
            }
        }
        config['routes.named_routes'].update(route)

    # Start other middleware
    for plugin in PluginImplementations(IMiddleware):
        app = plugin.make_middleware(app, config)

    # Fanstatic
    if debug:
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': True,
            'minified': False,
            'bottom': True,
            'bundle': False,
        }
    else:
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': False,
            'minified': True,
            'bottom': True,
            'bundle': False,
        }
    root_path = config.get('ckan.root_path', None)
    if root_path:
        root_path = re.sub('/{{LANG}}', '', root_path)
        fanstatic_config['base_url'] = root_path
    app = Fanstatic(app, **fanstatic_config)

    for plugin in PluginImplementations(IMiddleware):
        try:
            app = plugin.make_error_log_middleware(app, config)
        except AttributeError:
            log.critical('Middleware class {0} is missing the method'
                         'make_error_log_middleware.'.format(
                             plugin.__class__.__name__))

    # Initialize repoze.who
    who_parser = WhoConfig(conf['here'])
    who_parser.parse(open(app_conf['who.config_file']))

    app = PluggableAuthenticationMiddleware(
        app,
        who_parser.identifiers,
        who_parser.authenticators,
        who_parser.challengers,
        who_parser.mdproviders,
        who_parser.request_classifier,
        who_parser.challenge_decider,
        logging.getLogger('repoze.who'),
        logging.WARN,  # ignored
        who_parser.remote_user_key)

    # Update the main CKAN config object with the Flask specific keys
    # that were set here or autogenerated
    flask_config_keys = set(flask_app.config.keys()) - set(config.keys())
    for key in flask_config_keys:
        config[key] = flask_app.config[key]

    # Add a reference to the actual Flask app so it's easier to access
    app._wsgi_app = flask_app

    return app
Exemple #4
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        # tracking summary values will be stale, never store them
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        for r in pkg_dict.get('resources', []):
            r.pop('tracking_summary', None)

        data_dict_json = json.dumps(pkg_dict)

        if config.get('ckan.cache_validated_datasets', True):
            package_plugin = lib_plugins.lookup_package_plugin(
                pkg_dict.get('type'))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = lib_plugins.plugin_validate(
                package_plugin, {'model': model, 'session': model.Session},
                pkg_dict, schema, 'package_show')
            pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict,
                cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

        pkg_dict['data_dict'] = data_dict_json

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        # delete the package if there is no state, or the state is `deleted`
        if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + pkg_dict.keys()

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], extra['value']
            if isinstance(value, (tuple, list)):
                value = " ".join(map(text_type, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # we use the capacity to make things private in the search index
        if pkg_dict['private']:
            pkg_dict['capacity'] = 'private'
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get('organization'):
           pkg_dict['organization'] = pkg_dict['organization']['name']
        else:
           pkg_dict['organization'] = None

        # tracking
        if not tracking_summary:
            tracking_summary = model.TrackingSummary.get_for_package(
                pkg_dict['id'])
        pkg_dict['views_total'] = tracking_summary['total']
        pkg_dict['views_recent'] = tracking_summary['recent']

        resource_fields = [('name', 'res_name'),
                           ('description', 'res_description'),
                           ('format', 'res_format'),
                           ('url', 'res_url'),
                           ('resource_type', 'res_type')]
        resource_extras = [(e, 'res_extras_' + e) for e
                            in model.Resource.get_extra_columns()]
        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in resource_fields + resource_extras:
                pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(rel['type'])
            rel_dict[type].append(model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(model.Package.get(rel['object_package_id']).name)
        for key, value in rel_dict.iteritems():
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict['dataset_type'] = pkg_dict['type']

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        bogus_date = datetime.datetime(1, 1, 1)
        for key, value in pkg_dict.items():
            key = key.encode('ascii', 'ignore')
            if key.endswith('_date'):
                try:
                    date = parse(value, default=bogus_date)
                    if date != bogus_date:
                        value = date.isoformat() + 'Z'
                    else:
                        # The date field was empty, so dateutil filled it with
                        # the default bogus date
                        value = None
                except ValueError:
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # permission labels determine visibility in search, can't be set
        # in original dataset or before_index plugins
        labels = lib_plugins.get_permission_labels()
        dataset = model.Package.get(pkg_dict['id'])
        pkg_dict['permission_labels'] = labels.get_dataset_labels(
            dataset) if dataset else [] # TestPackageSearchIndex-workaround

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get('ckan.search.solr_commit', 'true')):
                commit = False
            conn.add(docs=[pkg_dict], commit=commit)
        except pysolr.SolrError as e:
            msg = 'Solr returned an error: {0}'.format(
                e[:1000] # limit huge responses
            )
            raise SearchIndexError(msg)
        except socket.error as e:
            err = 'Could not connect to Solr using {0}: {1}'.format(conn.url, str(e))
            log.error(err)
            raise SearchIndexError(err)

        commit_debug_msg = 'Not committed yet' if defer_commit else 'Committed'
        log.debug('Updated index for %s [%s]' % (pkg_dict.get('name'), commit_debug_msg))
Exemple #5
0
        return False

    obj.retry_times += 1
    obj.save()

    if obj.retry_times >= 5:
        obj.state = "ERROR"
        obj.save()
        log.error('Too many consecutive retries for object {0}'.format(obj.id))
        channel.basic_ack(method.delivery_tag)
        return False

    # Send the harvest object to the plugins that implement
    # the Harvester interface, only if the source type
    # matches
    for harvester in PluginImplementations(IHarvester):
        if harvester.info()['name'] == obj.source.type:
            fetch_and_import_stages(harvester, obj)

    model.Session.remove()
    channel.basic_ack(method.delivery_tag)


def fetch_and_import_stages(harvester, obj):
    obj.fetch_started = datetime.datetime.utcnow()
    obj.state = "FETCH"
    obj.save()
    success_fetch = harvester.fetch_stage(obj)
    obj.fetch_finished = datetime.datetime.utcnow()
    obj.save()
    if success_fetch is True:
Exemple #6
0
def get_harvester(harvest_source_type):
    for harvester in PluginImplementations(IHarvester):
        if harvester.info()['name'] == harvest_source_type:
            return harvester
Exemple #7
0
def make_flask_stack(conf, **app_conf):
    """ This has to pass the flask app through all the same middleware that
    Pylons used """

    root = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

    debug = asbool(app_conf.get('debug', app_conf.get('DEBUG', False)))
    testing = asbool(app_conf.get('testing', app_conf.get('TESTING', False)))

    app = flask_app = CKANFlask(__name__)
    app.debug = debug
    app.testing = testing
    app.template_folder = os.path.join(root, 'templates')
    app.app_ctx_globals_class = CKAN_AppCtxGlobals
    app.url_rule_class = CKAN_Rule

    # Update Flask config with the CKAN values. We use the common config
    # object as values might have been modified on `load_environment`
    if config:
        app.config.update(config)
    else:
        app.config.update(conf)
        app.config.update(app_conf)

    # Do all the Flask-specific stuff before adding other middlewares

    # Secret key needed for flask-debug-toolbar and sessions
    if not app.config.get('SECRET_KEY'):
        app.config['SECRET_KEY'] = config.get('beaker.session.secret')
    if not app.config.get('SECRET_KEY'):
        raise RuntimeError(u'You must provide a value for the secret key'
                           ' with the SECRET_KEY config option')

    if debug:
        from flask_debugtoolbar import DebugToolbarExtension
        app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False
        DebugToolbarExtension(app)

    # Use Beaker as the Flask session interface
    class BeakerSessionInterface(SessionInterface):
        def open_session(self, app, request):
            if 'beaker.session' in request.environ:
                return request.environ['beaker.session']

        def save_session(self, app, session, response):
            session.save()

    namespace = 'beaker.session.'
    session_opts = dict([(k.replace('beaker.', ''), v)
                         for k, v in config.iteritems()
                         if k.startswith(namespace)])
    if (not session_opts.get('session.data_dir')
            and session_opts.get('session.type', 'file') == 'file'):
        cache_dir = app_conf.get('cache_dir') or app_conf.get('cache.dir')
        session_opts['session.data_dir'] = '{data_dir}/sessions'.format(
            data_dir=cache_dir)

    app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts)
    app.session_interface = BeakerSessionInterface()

    # Add Jinja2 extensions and filters
    extensions = [
        'jinja2.ext.do', 'jinja2.ext.with_', jinja_extensions.SnippetExtension,
        jinja_extensions.CkanExtend,
        jinja_extensions.CkanInternationalizationExtension,
        jinja_extensions.LinkForExtension, jinja_extensions.ResourceExtension,
        jinja_extensions.UrlForStaticExtension,
        jinja_extensions.UrlForExtension
    ]
    for extension in extensions:
        app.jinja_env.add_extension(extension)
    app.jinja_env.filters['empty_and_escape'] = \
        jinja_extensions.empty_and_escape

    # Common handlers for all requests
    app.before_request(ckan_before_request)
    app.after_request(ckan_after_request)

    # Template context processors
    app.context_processor(helper_functions)
    app.context_processor(c_object)

    @app.route('/hello', methods=['GET'])
    def hello_world():
        return 'Hello World, this is served by Flask'

    @app.route('/hello', methods=['POST'])
    def hello_world_post():
        return 'Hello World, this was posted to Flask'

    # Auto-register all blueprints defined in the `views` folder
    _register_core_blueprints(app)

    # Set up each IBlueprint extension as a Flask Blueprint
    for plugin in PluginImplementations(IBlueprint):
        if hasattr(plugin, 'get_blueprint'):
            app.register_extension_blueprint(plugin.get_blueprint())

    # Start other middleware

    for plugin in PluginImplementations(IMiddleware):
        app = plugin.make_middleware(app, config)

    # Fanstatic
    if debug:
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': True,
            'minified': False,
            'bottom': True,
            'bundle': False,
        }
    else:
        fanstatic_config = {
            'versioning': True,
            'recompute_hashes': False,
            'minified': True,
            'bottom': True,
            'bundle': True,
        }
    app = Fanstatic(app, **fanstatic_config)

    for plugin in PluginImplementations(IMiddleware):
        try:
            app = plugin.make_error_log_middleware(app, config)
        except AttributeError:
            log.critical('Middleware class {0} is missing the method'
                         'make_error_log_middleware.'.format(
                             plugin.__class__.__name__))

    # Update the main CKAN config object with the Flask specific keys
    # that were set here or autogenerated
    flask_config_keys = set(flask_app.config.keys()) - set(config.keys())
    for key in flask_config_keys:
        config[key] = flask_app.config[key]

    # Add a reference to the actual Flask app so it's easier to access
    app._wsgi_app = flask_app

    return app
Exemple #8
0
def load_environment(global_conf, app_conf):
    """Configure the Pylons environment via the ``pylons.config``
    object
    """

    ######  Pylons monkey-patch
    # this must be run at a time when the env is semi-setup, thus inlined here.
    # Required by the deliverance plugin and iATI
    from pylons.wsgiapp import PylonsApp
    import pkg_resources
    find_controller_generic = PylonsApp.find_controller

    # This is from pylons 1.0 source, will monkey-patch into 0.9.7
    def find_controller(self, controller):
        if controller in self.controller_classes:
            return self.controller_classes[controller]
        # Check to see if its a dotted name
        if '.' in controller or ':' in controller:
            mycontroller = pkg_resources.EntryPoint.parse(
                'x=%s' % controller).load(False)
            self.controller_classes[controller] = mycontroller
            return mycontroller
        return find_controller_generic(self, controller)

    PylonsApp.find_controller = find_controller
    ###### END evil monkey-patch

    # Pylons paths
    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    paths = dict(root=root,
                 controllers=os.path.join(root, 'controllers'),
                 static_files=os.path.join(root, 'public'),
                 templates=[os.path.join(root, 'templates')])

    # Initialize config with the basic options
    config.init_app(global_conf, app_conf, package='ckan', paths=paths)

    # load all CKAN plugins
    plugins.load_all(config)

    from ckan.plugins import PluginImplementations
    from ckan.plugins.interfaces import IConfigurer

    for plugin in PluginImplementations(IConfigurer):
        # must do update in place as this does not work:
        # config = plugin.update_config(config)
        plugin.update_config(config)

    # This is set up before globals are initialized
    site_url = config.get('ckan.site_url', '')
    ckan_host = config['ckan.host'] = urlparse(site_url).netloc
    if config.get('ckan.site_id') is None:
        if ':' in ckan_host:
            ckan_host, port = ckan_host.split(':')
        config['ckan.site_id'] = ckan_host

    config['routes.map'] = make_map()
    config['pylons.app_globals'] = app_globals.Globals()
    config['pylons.h'] = ckan.lib.helpers

    ## redo template setup to use genshi.search_path (so remove std template setup)
    template_paths = [paths['templates'][0]]
    extra_template_paths = config.get('extra_template_paths', '')
    if extra_template_paths:
        # must be first for them to override defaults
        template_paths = extra_template_paths.split(',') + template_paths

    # Translator (i18n)
    translator = Translator(pylons.translator)

    def template_loaded(template):
        translator.setup(template)

    # Markdown ignores the logger config, so to get rid of excessive
    # markdown debug messages in the log, set it to the level of the
    # root logger.
    logging.getLogger("MARKDOWN").setLevel(logging.getLogger().level)

    # Create the Genshi TemplateLoader
    # config['pylons.app_globals'].genshi_loader = TemplateLoader(
    #    paths['templates'], auto_reload=True)
    # tmpl_options["genshi.loader_callback"] = template_loaded
    config['pylons.app_globals'].genshi_loader = TemplateLoader(
        template_paths, auto_reload=True, callback=template_loaded)

    # CONFIGURATION OPTIONS HERE (note: all config options will override
    # any Pylons config options)

    # Setup the SQLAlchemy database engine
    engine = engine_from_config(config, 'sqlalchemy.')

    if not model.meta.engine:
        model.init_model(engine)

    from ckan.plugins import PluginImplementations
    from ckan.plugins.interfaces import IConfigurable

    for plugin in PluginImplementations(IConfigurable):
        plugin.configure(config)
Exemple #9
0
def get_action(action):
    '''Return the ckan.logic.action function named by the given string.

    For example:

        get_action('package_create')

    will normally return the ckan.logic.action.create.py:package_create()
    function.

    Rather than importing a ckan.logic.action function and calling it directly,
    you should always fetch the function via get_action():

        # Call the package_create action function:
        get_action('package_create')(context, data_dict)

    This is because CKAN plugins can override action functions using the
    IActions plugin interface, causing get_action() to return a plugin-provided
    function instead of the default one.

    As the context parameter passed to an action function is commonly:

        context = {'model': ckan.model, 'session': ckan.model.Session,
                   'user': pylons.c.user or pylons.c.author}

    an action function returned by get_action() will automatically add these
    parameters to the context if they are not defined.  This is especially
    useful for extensions as they should not really be importing parts of ckan
    eg ckan.model and as such do not have access to model or model.Session.

    If a context of None is passed to the action function then the context dict
    will be created.

    :param action: name of the action function to return
    :type action: string

    :returns: the named action function
    :rtype: callable

    '''
    if _actions:
        if not action in _actions:
            raise KeyError("Action '%s' not found" % action)
        return _actions.get(action)
    # Otherwise look in all the plugins to resolve all possible
    # First get the default ones in the ckan/logic/action directory
    # Rather than writing them out in full will use __import__
    # to load anything from ckan.logic.action that looks like it might
    # be an action
    for action_module_name in ['get', 'create', 'update', 'delete']:
        module_path = 'ckan.logic.action.' + action_module_name
        module = __import__(module_path)
        for part in module_path.split('.')[1:]:
            module = getattr(module, part)
        for k, v in module.__dict__.items():
            if not k.startswith('_'):
                # Only load functions from the action module.
                if isinstance(v, types.FunctionType):
                    _actions[k] = v

                    # Whitelist all actions defined in logic/action/get.py as
                    # being side-effect free.
                    v.side_effect_free = getattr(v, 'side_effect_free', True)\
                        and action_module_name == 'get'

    # Then overwrite them with any specific ones in the plugins:
    resolved_action_plugins = {}
    fetched_actions = {}
    for plugin in PluginImplementations(IActions):
        for name, auth_function in plugin.get_actions().items():
            if name in resolved_action_plugins:
                raise Exception(
                    'The action %r is already implemented in %r' % (
                        name,
                        resolved_action_plugins[name]
                    )
                )
            log.debug('Auth function %r was inserted', plugin.name)
            resolved_action_plugins[name] = plugin.name
            fetched_actions[name] = auth_function
    # Use the updated ones in preference to the originals.
    _actions.update(fetched_actions)

    # wrap the functions
    for action_name, _action in _actions.items():
        def make_wrapped(_action, action_name):
            def wrapped(context=None, data_dict=None, **kw):
                if kw:
                    log.critical('%s was pass extra keywords %r'
                                 % (_action.__name__, kw))
                if context is None:
                    context = {}
                context.setdefault('model', model)
                context.setdefault('session', model.Session)
                try:
                    context.setdefault('user', base.c.user or base.c.author)
                except TypeError:
                    # c not registered
                    pass
                return _action(context, data_dict, **kw)
            return wrapped

        fn = make_wrapped(_action, action_name)
        # we need to mirror the docstring
        fn.__doc__ = _action.__doc__
        # we need to retain the side effect free behaviour
        if getattr(_action, 'side_effect_free', False):
            fn.side_effect_free = True
        _actions[action_name] = fn

    return _actions.get(action)
Exemple #10
0
 def __init__(self):
     BaseController.__init__(self)
     self.extensions = PluginImplementations(IGroupController)
def datastore_search(context, data_dict, original_data_dict):
    '''
    Searches the datastore using a query schema similar to the standard CKAN datastore query schema,
    but with versioning.

    :param context: the context dict from the action call
    :param data_dict: the data_dict from the action call
    :param original_data_dict: the data_dict before it was validated
    :return: a dict including the search results amongst other things
    '''
    original_data_dict, data_dict, version, search = create_search(
        context, data_dict, original_data_dict)
    resource_id = data_dict[u'resource_id']
    index_name = prefix_resource(resource_id)

    # if the version is None, default it to the current timestamp
    if version is None:
        version = to_timestamp(datetime.now())

    # add the version filter to the query
    search = search.filter(create_version_query(version))

    # if the run query option is false (default to true if not present) then just return the query
    # we would have run against elasticsearch instead of actually running it. This is useful for
    # running the query outside of ckan, for example on a tile server.
    if not data_dict.get(u'run_query', True):
        return {
            u'indexes': [index_name],
            u'search': search.to_dict(),
        }
    else:
        result = run_search(search, [index_name])

        # allow other extensions implementing our interface to modify the result
        for plugin in PluginImplementations(IVersionedDatastore):
            result = plugin.datastore_modify_result(context,
                                                    original_data_dict,
                                                    data_dict, result)

        # add the actual result object to the context in case the caller is an extension and they
        # have used one of the interface hooks to alter the search object and include, for example,
        # an aggregation
        context[u'versioned_datastore_query_result'] = result

        # get the fields
        mapping, fields = get_fields(resource_id, version)
        # allow other extensions implementing our interface to modify the field definitions
        for plugin in PluginImplementations(IVersionedDatastore):
            fields = plugin.datastore_modify_fields(resource_id, mapping,
                                                    fields)

        query_for_logging = {}
        for key in _query_log_keys:
            if data_dict.get(key, None):
                query_for_logging[key] = data_dict[key]
        log_query(query_for_logging, u'basicsearch')

        # return a dictionary containing the results and other details
        return {
            u'total':
            result.hits.total,
            u'records': [hit.data.to_dict() for hit in result],
            u'facets':
            format_facets(result.aggs.to_dict()),
            u'fields':
            fields,
            u'raw_fields':
            mapping[u'mappings'][DOC_TYPE][u'properties'][u'data']
            [u'properties'],
            u'after':
            get_last_after(result.hits),
            u'_backend':
            u'versioned-datastore',
        }
def datastore_search_raw(resource_id,
                         context,
                         data_dict,
                         original_data_dict,
                         search=None,
                         version=None,
                         raw_result=False,
                         include_version=True):
    '''
    Searches the datastore using a raw elasticsearch query.

    :param resource_id: the id of the resource to search
    :param context: the context dict from the action call
    :param data_dict: the data_dict from the action call
    :param original_data_dict: the data_dict before it was validated
    :param search: the elasticsearch query to run
    :param version: the version of the data to query against
    :param raw_result: whether to return the result as a raw elasticsearch result, or format it in
                       the same way as a normal datastore_search call would
    :param include_version: whether to include the version in the search or not
    :return: a dict containing the results of the search
    '''
    if search is None:
        search = {}
    if version is None:
        version = to_timestamp(datetime.now())
    index_name = prefix_resource(resource_id)
    search = Search.from_dict(search)

    try:
        # the user has asked for a raw result and that the version filter is not included
        if raw_result and not include_version:
            version = None

        # run the query passing the version which will either be the requested version, the current
        # timestamp or None if no version filter should be included in the search
        result = run_search(search, index_name, version)

        if raw_result:
            return result.to_dict()

        # allow other extensions implementing our interface to modify the result object
        for plugin in PluginImplementations(IVersionedDatastore):
            result = plugin.datastore_modify_result(context,
                                                    original_data_dict,
                                                    data_dict, result)

        # add the actual result object to the context in case the caller is an extension and
        # they have used one of the interface hooks to alter the search object and include, for
        # example, an aggregation
        context[u'versioned_datastore_query_result'] = result

        # get the fields
        mapping, fields = get_fields(resource_id, version)
        # allow other extensions implementing our interface to modify the field definitions
        for plugin in PluginImplementations(IVersionedDatastore):
            fields = plugin.datastore_modify_fields(resource_id, mapping,
                                                    fields)

        # return a dictionary containing the results and other details
        return {
            u'total':
            result.hits.total,
            u'records': [hit.data.to_dict() for hit in result],
            u'facets':
            format_facets(result.aggs.to_dict()),
            u'fields':
            fields,
            u'raw_fields':
            mapping[u'mappings'][DOC_TYPE][u'properties'][u'data']
            [u'properties'],
            u'after':
            get_last_after(result.hits),
            u'_backend':
            u'versioned-datastore',
        }
    except RequestError as e:
        raise toolkit.ValidationError(str(e))
Exemple #13
0
def make_flask_stack(conf: Union[Config, CKANConfig]) -> CKANApp:
    """ This has to pass the flask app through all the same middleware that
    Pylons used """

    root = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

    debug = asbool(conf.get('debug', conf.get('DEBUG', False)))
    testing = asbool(conf.get('testing', conf.get('TESTING', False)))
    app = flask_app = CKANFlask(__name__, static_url_path='')

    # Register storage for accessing group images, site logo, etc.
    storage_folder = []
    storage = uploader.get_storage_path()
    if storage:
        storage_folder = [os.path.join(storage, 'storage')]

    # Static files folders (core and extensions)
    public_folder = config.get_value(u'ckan.base_public_folder')
    app.static_folder = config.get_value('extra_public_paths').split(',') + [
        os.path.join(root, public_folder)
    ] + storage_folder

    app.jinja_options = jinja_extensions.get_jinja_env_options()
    app.jinja_env.policies['ext.i18n.trimmed'] = True

    app.debug = debug
    app.testing = testing
    app.template_folder = os.path.join(root, 'templates')
    app.app_ctx_globals_class = CKAN_AppCtxGlobals
    app.url_rule_class = CKAN_Rule

    # Update Flask config with the CKAN values. We use the common config
    # object as values might have been modified on `load_environment`
    if config:
        app.config.update(config)
    else:
        app.config.update(conf)

    # Do all the Flask-specific stuff before adding other middlewares

    # Secret key needed for flask-debug-toolbar and sessions
    if not app.config.get('SECRET_KEY'):
        app.config['SECRET_KEY'] = config.get_value('beaker.session.secret')
    if not app.config.get('SECRET_KEY'):
        raise RuntimeError(u'You must provide a value for the secret key'
                           ' with the SECRET_KEY config option')

    root_path = config.get_value('ckan.root_path')
    if debug:
        from flask_debugtoolbar import DebugToolbarExtension
        app.config['DEBUG_TB_INTERCEPT_REDIRECTS'] = False
        debug_ext = DebugToolbarExtension()

        # register path that includes `ckan.site_root` before
        # initializing debug app. In such a way, our route receives
        # higher precedence.

        # TODO: After removal of Pylons code, switch to
        # `APPLICATION_ROOT` config value for flask application. Right
        # now it's a bad option because we are handling both pylons
        # and flask urls inside helpers and splitting this logic will
        # bring us tons of headache.
        if root_path:
            app.add_url_rule(
                root_path.replace('{{LANG}}', '').rstrip('/') +
                '/_debug_toolbar/static/<path:filename>',
                '_debug_toolbar.static', debug_ext.send_static_file)
        debug_ext.init_app(app)

        from werkzeug.debug import DebuggedApplication
        app.wsgi_app = DebuggedApplication(app.wsgi_app, True)

    # Use Beaker as the Flask session interface
    class BeakerSessionInterface(SessionInterface):
        def open_session(self, app: Any, request: Any):
            if 'beaker.session' in request.environ:
                return request.environ['beaker.session']

        def save_session(self, app: Any, session: Any, response: Any):
            session.save()

    namespace = 'beaker.session.'
    session_opts = {
        k.replace('beaker.', ''): v
        for k, v in config.items() if k.startswith(namespace)
    }
    if (not session_opts.get('session.data_dir')
            and session_opts.get('session.type', 'file') == 'file'):
        cache_dir = conf.get('cache_dir') or conf.get('cache.dir')
        session_opts['session.data_dir'] = '{data_dir}/sessions'.format(
            data_dir=cache_dir)

    app.wsgi_app = RootPathMiddleware(app.wsgi_app)
    app.wsgi_app = SessionMiddleware(app.wsgi_app, session_opts)
    app.session_interface = BeakerSessionInterface()

    # Add Jinja2 extensions and filters
    app.jinja_env.filters['empty_and_escape'] = \
        jinja_extensions.empty_and_escape

    # Common handlers for all requests
    #
    # flask types do not mention that it's possible to return a response from
    # the `before_request` callback
    app.before_request(ckan_before_request)
    app.after_request(ckan_after_request)

    # Template context processors
    app.context_processor(helper_functions)
    app.context_processor(c_object)

    app.context_processor(_ungettext_alias)

    # Babel
    _ckan_i18n_dir = i18n.get_ckan_i18n_dir()

    pairs = [cast("tuple[str, str]", (_ckan_i18n_dir, u'ckan'))
             ] + [(p.i18n_directory(), p.i18n_domain())
                  for p in reversed(list(PluginImplementations(ITranslation)))]

    i18n_dirs, i18n_domains = zip(*pairs)

    app.config[u'BABEL_TRANSLATION_DIRECTORIES'] = ';'.join(i18n_dirs)
    app.config[u'BABEL_DOMAIN'] = 'ckan'
    app.config[u'BABEL_MULTIPLE_DOMAINS'] = ';'.join(i18n_domains)
    app.config[u'BABEL_DEFAULT_TIMEZONE'] = str(helpers.get_display_timezone())

    babel = CKANBabel(app)

    babel.localeselector(get_locale)

    # WebAssets
    _setup_webassets(app)

    # Auto-register all blueprints defined in the `views` folder
    _register_core_blueprints(app)
    _register_error_handler(app)

    # Set up each IBlueprint extension as a Flask Blueprint
    for plugin in PluginImplementations(IBlueprint):
        plugin_blueprints = plugin.get_blueprint()
        if not isinstance(plugin_blueprints, list):
            plugin_blueprints = [plugin_blueprints]
        for blueprint in plugin_blueprints:
            app.register_extension_blueprint(blueprint)

    lib_plugins.register_package_blueprints(app)
    lib_plugins.register_group_blueprints(app)

    # Start other middleware
    for plugin in PluginImplementations(IMiddleware):
        app = plugin.make_middleware(app, config)

    for plugin in PluginImplementations(IMiddleware):
        try:
            app = plugin.make_error_log_middleware(app, config)
        except AttributeError:
            log.critical('Middleware class {0} is missing the method'
                         'make_error_log_middleware.'.format(
                             plugin.__class__.__name__))

    # Initialize repoze.who
    who_parser = WhoConfig(conf['here'])
    who_parser.parse(open(cast(str, conf['who.config_file'])))

    app = PluggableAuthenticationMiddleware(
        RepozeAdapterMiddleware(app),
        who_parser.identifiers,
        who_parser.authenticators,
        who_parser.challengers,
        who_parser.mdproviders,
        who_parser.request_classifier,
        who_parser.challenge_decider,
        logging.getLogger('repoze.who'),
        logging.WARN,  # ignored
        who_parser.remote_user_key)

    # Update the main CKAN config object with the Flask specific keys
    # that were set here or autogenerated
    flask_config_keys = set(flask_app.config.keys()) - set(config.keys())
    for key in flask_config_keys:
        config[key] = flask_app.config[key]

    # Prevent the host from request to be added to the new header location.
    app = HostHeaderMiddleware(app)

    app = I18nMiddleware(app)

    if config.get_value('ckan.tracking_enabled'):
        app = TrackingMiddleware(app, config)

    # Add a reference to the actual Flask app so it's easier to access
    # type_ignore_reason: custom attribute
    app._wsgi_app = flask_app  # type: ignore

    return app
Exemple #14
0
def extract(ini_path, res_dict):
    """
    Download resource, extract and store metadata.

    The extracted metadata is stored in the database.

    Note that this task does check whether the resource exists in the
    database, whether the resource's format is indexed or whether there
    is an existing task working on the resource's metadata. This is the
    responsibility of the caller.

    The task does check which metadata fields are configured to be
    indexed and only stores those in the database.

    Any previously stored metadata for the resource is cleared.
    """
    load_config(ini_path)

    # Get package data before doing any hard work so that we can fail
    # early if the package is private.
    try:
        toolkit.get_action('package_show')({'validate': False},
                                           {'id': res_dict['package_id']})
    except toolkit.NotAuthorized:
        log.debug(('Not extracting resource {} since it belongs to the ' +
                  'private dataset {}.').format(res_dict['id'],
                  res_dict['package_id']))
        return

    try:
        metadata = ResourceMetadata.one(resource_id=res_dict['id'])
    except NoResultFound:
        metadata = ResourceMetadata.create(resource_id=res_dict['id'])
    try:
        metadata.last_url = res_dict['url']
        metadata.last_format = res_dict['format']
        metadata.last_extracted = datetime.datetime.now()
        metadata.meta.clear()
        extracted = download_and_extract(res_dict['url'])
        for plugin in PluginImplementations(IExtractorPostprocessor):
            plugin.extractor_after_extract(res_dict, extracted)
        for key, value in extracted.iteritems():
            if not is_field_indexed(key):
                continue

            # Some documents contain multiple values for the same field. This
            # is not supported in our database model, hence we collapse these
            # into a single value.
            if isinstance(value, list):
                log.debug('Collapsing multiple values for metadata field ' +
                          '"{}" in resource {} into a single value.'.format(key,
                          res_dict['id']))
                value = ', '.join(value)

            metadata.meta[key] = value
    except RequestException as e:
        log.warn('Failed to download resource data from "{}": {}'.format(
                 res_dict['url'], e.message))
    finally:
        metadata.task_id = None
        metadata.save()

    for plugin in PluginImplementations(IExtractorPostprocessor):
        plugin.extractor_after_save(res_dict, metadata.as_dict())

    # We need to update the search index for the package here. Note that
    # we cannot rely on the automatic update that happens when a resource
    # is changed, since our extraction task runs asynchronously and may
    # be finished only when the automatic index update has already run.
    search.rebuild(package_id=res_dict['package_id'])

    for plugin in PluginImplementations(IExtractorPostprocessor):
        plugin.extractor_after_index(res_dict, metadata.as_dict())
Exemple #15
0
def harvest_objects_import(context, data_dict):
    '''
        Reimports the current harvest objects
        It performs the import stage with the last fetched objects, optionally
        belonging to a certain source.
        Please note that no objects will be fetched from the remote server.
        It will only affect the last fetched objects already present in the
        database.
    '''
    log.info('Harvest objects import: %r', data_dict)
    check_access('harvest_objects_import', context, data_dict)

    model = context['model']
    session = context['session']
    source_id = data_dict.get('source_id', None)
    harvest_object_id = data_dict.get('harvest_object_id', None)
    package_id_or_name = data_dict.get('package_id', None)

    segments = context.get('segments', None)

    join_datasets = context.get('join_datasets', True)

    if source_id:
        source = HarvestSource.get(source_id)
        if not source:
            log.error('Harvest source %s does not exist', source_id)
            raise NotFound('Harvest source %s does not exist' % source_id)

        if not source.active:
            log.warn('Harvest source %s is not active.', source_id)
            raise Exception('This harvest source is not active')

        last_objects_ids = session.query(HarvestObject.id) \
                .join(HarvestSource) \
                .filter(HarvestObject.source==source) \
                .filter(HarvestObject.current==True)

    elif harvest_object_id:
        last_objects_ids = session.query(HarvestObject.id) \
                .filter(HarvestObject.id==harvest_object_id)
    elif package_id_or_name:
        last_objects_ids = session.query(HarvestObject.id) \
            .join(Package) \
            .filter(HarvestObject.current==True) \
            .filter(Package.state==u'active') \
            .filter(or_(Package.id==package_id_or_name,
                        Package.name==package_id_or_name))
        join_datasets = False
    else:
        last_objects_ids = session.query(HarvestObject.id) \
                .filter(HarvestObject.current==True)

    if join_datasets:
        last_objects_ids = last_objects_ids.join(Package) \
            .filter(Package.state==u'active')

    last_objects_ids = last_objects_ids.all()

    last_objects_count = 0

    for obj_id in last_objects_ids:
        if segments and str(hashlib.md5(
                obj_id[0]).hexdigest())[0] not in segments:
            continue

        obj = session.query(HarvestObject).get(obj_id)

        for harvester in PluginImplementations(IHarvester):
            if harvester.info()['name'] == obj.source.type:
                if hasattr(harvester, 'force_import'):
                    harvester.force_import = True
                harvester.import_stage(obj)
                break
        last_objects_count += 1
    log.info('Harvest objects imported: %s', last_objects_count)
    return last_objects_count
Exemple #16
0
"""Routes configuration

The more specific and detailed routes should be defined first so they
may take precedent over the more generic routes. For more information
refer to the routes manual at http://routes.groovie.org/docs/

"""
from pylons import config
from routes import Mapper
from ckan.plugins import PluginImplementations, IRoutes

routing_plugins = PluginImplementations(IRoutes)


def make_map():
    """Create, configure and return the routes Mapper"""
    # import controllers here rather than at root level because
    # pylons config is initialised by this point.

    # Helpers to reduce code clutter
    GET = dict(method=['GET'])
    PUT = dict(method=['PUT'])
    POST = dict(method=['POST'])
    DELETE = dict(method=['DELETE'])
    GET_POST = dict(method=['GET', 'POST'])
    PUT_POST = dict(method=['PUT', 'POST'])
    PUT_POST_DELETE = dict(method=['PUT', 'POST', 'DELETE'])
    OPTIONS = dict(method=['OPTIONS'])

    from ckan.lib.plugins import register_package_plugins
    from ckan.lib.plugins import register_group_plugins
Exemple #17
0
def harvest_objects_import(context, data_dict):
    '''
    Reimports the existing harvest objects, specified by either source_id,
    harvest_object_id or package_id.

    It performs the import stage with the last fetched objects, optionally
    belonging to a certain source.

    Please note that no objects will be fetched from the remote server.

    It will only affect the last fetched objects already present in the
    database.

    :param source_id: the id of the harvest source to import
    :type source_id: string
    :param guid: the guid of the harvest object to import
    :type guid: string
    :param harvest_object_id: the id of the harvest object to import
    :type harvest_object_id: string
    :param package_id: the id or name of the package to import
    :type package_id: string
    '''
    log.info('Harvest objects import: %r', data_dict)
    check_access('harvest_objects_import', context, data_dict)

    session = context['session']
    source_id = data_dict.get('source_id')
    guid = data_dict.get('guid')
    harvest_object_id = data_dict.get('harvest_object_id')
    package_id_or_name = data_dict.get('package_id')

    segments = context.get('segments')

    join_datasets = context.get('join_datasets', True)

    if guid:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.guid == guid) \
                   .filter(HarvestObject.current == True)  # noqa: E712

    elif source_id:
        source = HarvestSource.get(source_id)
        if not source:
            log.error('Harvest source %s does not exist', source_id)
            raise NotFound('Harvest source %s does not exist' % source_id)

        if not source.active:
            log.warn('Harvest source %s is not active.', source_id)
            raise Exception('This harvest source is not active')

        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .join(HarvestSource) \
                   .filter(HarvestObject.source == source) \
                   .filter(HarvestObject.current == True)  # noqa: E712

    elif harvest_object_id:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.id == harvest_object_id)
    elif package_id_or_name:
        last_objects_ids = (
            session.query(HarvestObject.id).join(Package).filter(
                HarvestObject.current == True)  # noqa: E712
            .filter(Package.state == u'active').filter(
                or_(Package.id == package_id_or_name,
                    Package.name == package_id_or_name)))
        join_datasets = False
    else:
        last_objects_ids = \
            session.query(HarvestObject.id) \
                   .filter(HarvestObject.current == True)  # noqa: E712

    if join_datasets:
        last_objects_ids = last_objects_ids.join(Package) \
            .filter(Package.state == u'active')

    last_objects_ids = last_objects_ids.all()

    last_objects_count = 0

    for obj_id in last_objects_ids:
        if segments and \
                str(hashlib.md5(six.ensure_binary(obj_id[0])).hexdigest())[0] not in segments:
            continue

        obj = session.query(HarvestObject).get(obj_id)

        for harvester in PluginImplementations(IHarvester):
            if harvester.info()['name'] == obj.source.type:
                if hasattr(harvester, 'force_import'):
                    harvester.force_import = True
                harvester.import_stage(obj)
                break
        last_objects_count += 1
    log.info('Harvest objects imported: %s', last_objects_count)
    return last_objects_count