Example #1
0
def setup_periodic_tasks(sender: Celery, **kwargs):

    # fire off tasks that should run each time the app starts
    refresh_software_versions.apply_async(countdown=60)

    # queue up tasks that should be run periodically
    sender.add_periodic_task(60*60*24, refresh_software_versions)
Example #2
0
def set_up_periodic_tasks(sender: Celery, **kwargs) -> None:
    """
    Configured scheduled tasks. Both discovery and attack jobs run every 10 seconds, but further code may choose
    whether or not to execute something at this point. This method shouldn't be called except by Celery itself.

    :param sender: Celery instance
    :param kwargs: Optional values
    """
    if not modules:
        safe_load_config()
    sender.add_periodic_task(BEAT_INTERVAL,
                             run_jobs.s(),
                             name='Launch attacks')
    from redbot.modules.discovery import do_discovery
    sender.add_periodic_task(BEAT_INTERVAL,
                             do_discovery.s(),
                             queue='discovery',
                             name='Launch discovery')
Example #3
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    sender.add_periodic_task(60 * 60, periodic_curse_login.s())

    sender.add_periodic_task(15 * 60, periodic_fill_missing_addons.s())

    sender.add_periodic_task(25 * 60,
                             periodic_addon_feeds.s(
                                 Timespan.HOURLY.value))  # 25 minutes
    sender.add_periodic_task(11 * 60 * 60,
                             periodic_addon_feeds.s(
                                 Timespan.DAILY.value))  # 11 hours
    sender.add_periodic_task(3 * 24 * 60 * 60,
                             periodic_addon_feeds.s(
                                 Timespan.WEEKLY.value))  # 3 days
    sender.add_periodic_task(2 * 7 * 24 * 60 * 60,
                             periodic_addon_feeds.s(
                                 Timespan.COMPLETE.value))  # 2 weeks (14 days)

    sender.add_periodic_task(24 * 60 * 60,
                             periodic_find_hidden_addons.s())  # daily

    sender.add_periodic_task(7 * 24 * 60 * 60,
                             periodic_request_all_files.s())  # weekly

    sender.add_periodic_task(crontab(minute='0', hour='*'),
                             periodic_keep_history.s())  # every hour at XX:00

    periodic_fill_missing_addons.apply_async(countdown=30)

    # Mainly for staging, so we don't redo a full dl every time the env restart if it's been less than a day.
    # The hourly & daily's will get it.
    last = redis_store.get('periodic-addon_feeds-last-{}'.format(
        Timespan.COMPLETE.value))
    if last is None or datetime.now() - datetime.fromtimestamp(
            int(last)) > timedelta(days=1):
        periodic_addon_feeds.apply_async([Timespan.COMPLETE.value],
                                         countdown=60)

    last = redis_store.get('periodic-find_hidden_addons-last')
    if last is None or datetime.now() - datetime.fromtimestamp(
            int(last)) > timedelta(days=1):
        periodic_find_hidden_addons.apply_async(countdown=60 * 60)

    last = redis_store.get('periodic-request_all_files-last')
    if last is None or datetime.now() - datetime.fromtimestamp(
            int(last)) > timedelta(days=1):
        periodic_request_all_files.apply_async(countdown=4 * 60 * 60)
Example #4
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    if not settings.DEBUG:
        sender.add_periodic_task(1.0, redis_celery_queue_depth.s(), name="1 sec queue probe", priority=0)
    # Heartbeat every 10sec to make sure the worker is alive
    sender.add_periodic_task(10.0, redis_heartbeat.s(), name="10 sec heartbeat", priority=0)

    # Update events table partitions twice a week
    sender.add_periodic_task(
        crontab(day_of_week="mon,fri", hour=0, minute=0), update_event_partitions.s(),  # check twice a week
    )

    if getattr(settings, "MULTI_TENANCY", False) and not is_clickhouse_enabled():
        sender.add_periodic_task(crontab(minute=0, hour="*/12"), run_session_recording_retention.s())

    # Send weekly status report on self-hosted instances
    if not getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0), status_report.s())

    # Cloud (posthog-cloud) cron jobs
    if getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(crontab(hour=0, minute=0), calculate_billing_daily_usage.s())  # every day midnight UTC

    # Send weekly email report (~ 8:00 SF / 16:00 UK / 17:00 EU)
    sender.add_periodic_task(crontab(day_of_week="mon", hour=15, minute=0), send_weekly_email_report.s())

    sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0), clean_stale_partials.s())

    # delete old plugin logs every 4 hours
    sender.add_periodic_task(crontab(minute=0, hour="*/4"), delete_old_plugin_logs.s())

    # sync all Organization.available_features every hour
    sender.add_periodic_task(crontab(minute=30, hour="*"), sync_all_organization_available_features.s())

    sender.add_periodic_task(
        UPDATE_CACHED_DASHBOARD_ITEMS_INTERVAL_SECONDS, check_cached_items.s(), name="check dashboard items"
    )

    if is_clickhouse_enabled():
        sender.add_periodic_task(120, clickhouse_lag.s(), name="clickhouse table lag")
        sender.add_periodic_task(120, clickhouse_row_count.s(), name="clickhouse events table row count")
        sender.add_periodic_task(120, clickhouse_part_count.s(), name="clickhouse table parts count")
        sender.add_periodic_task(120, clickhouse_mutation_count.s(), name="clickhouse table mutations count")
    else:
        sender.add_periodic_task(
            ACTION_EVENT_MAPPING_INTERVAL_SECONDS,
            calculate_event_action_mappings.s(),
            name="calculate event action mappings",
            expires=ACTION_EVENT_MAPPING_INTERVAL_SECONDS,
        )

    sender.add_periodic_task(120, calculate_cohort.s(), name="recalculate cohorts")

    if settings.ASYNC_EVENT_PROPERTY_USAGE:
        sender.add_periodic_task(
            EVENT_PROPERTY_USAGE_INTERVAL_SECONDS,
            calculate_event_property_usage.s(),
            name="calculate event property usage",
        )
Example #5
0
File: main.py Project: cbp44/whyis
class App(Empty):

    managed = False

    def configure_extensions(self):

        Empty.configure_extensions(self)
        self.celery = Celery(self.name,
                             broker=self.config['CELERY_BROKER_URL'],
                             beat=True)
        self.celery.conf.update(self.config)
        self.celery.conf.ONCE = {
            'backend': 'celery_once.backends.Redis',
            'settings': {
                'url': self.config['CELERY_BROKER_URL'],
                'default_timeout': 60 * 60 * 24
            }
        }

        class ContextTask(self.celery.Task):
            def __call__(self, *args, **kwargs):
                with app.app_context():
                    return self.run(*args, **kwargs)

        self.celery.Task = ContextTask

        # Make QueueOnce app context aware.
        class ContextQueueOnce(QueueOnce):
            def __call__(self, *args, **kwargs):
                with app.app_context():
                    return super(ContextQueueOnce,
                                 self).__call__(*args, **kwargs)

        # Attach to celery object for easy access.
        self.celery.QueueOnce = ContextQueueOnce

        app = self

        if 'root_path' in self.config:
            self.root_path = self.config['root_path']

        if 'WHYIS_TEMPLATE_DIR' in self.config and app.config[
                'WHYIS_TEMPLATE_DIR'] is not None:
            my_loader = jinja2.ChoiceLoader([
                jinja2.FileSystemLoader(p)
                for p in self.config['WHYIS_TEMPLATE_DIR']
            ] + [app.jinja_loader])
            app.jinja_loader = my_loader

        @self.celery.task(base=QueueOnce, once={'graceful': True})
        def process_resource(service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            service.process_graph(app.db)

        @self.celery.task
        def process_nanopub(nanopub_uri, service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            print(service, nanopub_uri)
            if app.nanopub_manager.is_current(nanopub_uri):
                nanopub = app.nanopub_manager.get(nanopub_uri)
                service.process_graph(nanopub)
            else:
                print("Skipping retired nanopub", nanopub_uri)

        def setup_periodic_task(task):
            @self.celery.task
            def find_instances():
                print("Triggered task", task['name'])
                for x, in task['service'].getInstances(app.db):
                    task['do'](x)

            @self.celery.task
            def do_task(uri):
                print("Running task", task['name'], 'on', uri)
                resource = app.get_resource(uri)

                # result never used
                task['service'].process_graph(resource.graph)

            task['service'].app = app
            task['find_instances'] = find_instances
            task['do'] = do_task

            return task

        app.inference_tasks = []
        if 'inference_tasks' in self.config:
            app.inference_tasks = [
                setup_periodic_task(task)
                for task in self.config['inference_tasks']
            ]

        for name, task in list(self.config['inferencers'].items()):
            task.app = app

        for task in app.inference_tasks:
            if 'schedule' in task:
                #print "Scheduling task", task['name'], task['schedule']
                self.celery.add_periodic_task(crontab(**task['schedule']),
                                              task['find_instances'].s(),
                                              name=task['name'])
            else:
                task['find_instances'].delay()

        @self.celery.task()
        def update(nanopub_uri):
            '''gets called whenever there is a change in the knowledge graph.
            Performs a breadth-first knowledge expansion of the current change.'''
            #print "Updating on", nanopub_uri
            #if not app.nanopub_manager.is_current(nanopub_uri):
            #    print("Skipping retired nanopub", nanopub_uri)
            #    return
            nanopub = app.nanopub_manager.get(nanopub_uri)
            nanopub_graph = ConjunctiveGraph(nanopub.store)
            if 'inferencers' in self.config:
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.updateChangeQuery:
                        if service.getInstances(nanopub_graph):
                            print("invoking", name, nanopub_uri)
                            process_nanopub.apply_async(kwargs={
                                'nanopub_uri': nanopub_uri,
                                'service_name': name
                            },
                                                        priority=1)
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.globalChangeQuery:
                        process_resource.apply_async(
                            kwargs={'service_name': name}, priority=5)

        def run_update(nanopub_uri):
            update.apply_async(args=[nanopub_uri], priority=9)

        self.nanopub_update_listener = run_update

        app = self

        @self.celery.task(base=self.celery.QueueOnce,
                          once={'graceful': True},
                          retry_backoff=True,
                          retry_jitter=True,
                          autoretry_for=(Exception, ),
                          max_retries=4,
                          bind=True)
        def run_importer(self, entity_name):
            entity_name = URIRef(entity_name)
            print('importing', entity_name)
            importer = app.find_importer(entity_name)
            if importer is None:
                return
            importer.app = app
            modified = importer.last_modified(entity_name, app.db,
                                              app.nanopub_manager)
            updated = importer.modified(entity_name)
            if updated is None:
                updated = datetime.now(pytz.utc)
            print("Remote modified:", updated, type(updated),
                  "Local modified:", modified, type(modified))
            if modified is None or (updated - modified
                                    ).total_seconds() > importer.min_modified:
                importer.load(entity_name, app.db, app.nanopub_manager)

        self.run_importer = run_importer

        self.template_imports = {}
        if 'template_imports' in self.config:
            for name, imp in list(self.config['template_imports'].items()):
                try:
                    m = importlib.import_module(imp)
                    self.template_imports[name] = m
                except Exception:
                    print(
                        "Error importing module %s into template variable %s."
                        % (imp, name))
                    raise

        self.nanopub_manager = NanopublicationManager(
            self.db.store,
            Namespace('%s/pub/' % (self.config['lod_prefix'])),
            self,
            update_listener=self.nanopub_update_listener)

        if 'CACHE_TYPE' in self.config:
            from flask_caching import Cache
            self.cache = Cache(self)
        else:
            self.cache = None

    _file_depot = None

    @property
    def file_depot(self):
        if self._file_depot is None:
            if DepotManager.get('files') is None:
                DepotManager.configure('files', self.config['file_archive'])
            self._file_depot = DepotManager.get('files')
        return self._file_depot

    _nanopub_depot = None

    @property
    def nanopub_depot(self):
        if self._nanopub_depot is None and 'nanopub_archive' in self.config:
            if DepotManager.get('nanopublications') is None:
                DepotManager.configure('nanopublications',
                                       self.config['nanopub_archive'])
            self._nanopub_depot = DepotManager.get('nanopublications')
        return self._nanopub_depot

    def configure_database(self):
        """
        Database configuration should be set here
        """
        self.NS = NS
        self.NS.local = rdflib.Namespace(self.config['lod_prefix'] + '/')

        self.admin_db = database.engine_from_config(self.config, "admin_")
        self.db = database.engine_from_config(self.config, "knowledge_")
        self.db.app = self

        self.vocab = ConjunctiveGraph()
        #print URIRef(self.config['vocab_file'])
        default_vocab = Graph(store=self.vocab.store)
        default_vocab.parse(source=os.path.abspath(
            os.path.join(os.path.dirname(__file__), "default_vocab.ttl")),
                            format="turtle",
                            publicID=str(self.NS.local))
        custom_vocab = Graph(store=self.vocab.store)
        custom_vocab.parse(self.config['vocab_file'],
                           format="turtle",
                           publicID=str(self.NS.local))

        self.datastore = WhyisUserDatastore(self.admin_db, {},
                                            self.config['lod_prefix'])
        self.security = Security(self,
                                 self.datastore,
                                 register_form=ExtendedRegisterForm)

    def __weighted_route(self, *args, **kwargs):
        """
        Override the match_compare_key function of the Rule created by invoking Flask.route.
        This can only be done on the app, not in a blueprint, because blueprints lazily add Rule's when they are registered on an app.
        """
        def decorator(view_func):
            compare_key = kwargs.pop('compare_key', None)
            # register view_func with route
            self.route(*args, **kwargs)(view_func)

            if compare_key is not None:
                rule = self.url_map._rules[-1]
                rule.match_compare_key = lambda: compare_key

            return view_func

        return decorator

    def map_entity(self, name):
        for importer in self.config['namespaces']:
            if importer.matches(name):
                new_name = importer.map(name)
                #print 'Found mapped URI', new_name
                return new_name, importer
        return None, None

    def find_importer(self, name):
        for importer in self.config['namespaces']:
            if importer.resource_matches(name):
                return importer
        return None

    class Entity(rdflib.resource.Resource):
        _this = None

        def this(self):
            if self._this is None:
                self._this = self._graph.app.get_entity(self.identifier)
            return self._this

        _description = None

        def description(self):
            if self._description is None:
                #                try:
                result = Graph()
                #                try:
                for quad in self._graph.query(
                        '''
construct {
    ?e ?p ?o.
    ?o rdfs:label ?label.
    ?o skos:prefLabel ?prefLabel.
    ?o dc:title ?title.
    ?o foaf:name ?name.
    ?o ?pattr ?oattr.
    ?oattr rdfs:label ?oattrlabel
} where {
    graph ?g {
      ?e ?p ?o.
    }
    ?g a np:Assertion.
    optional {
      ?e sio:hasAttribute|sio:hasPart ?o.
      ?o ?pattr ?oattr.
      optional {
        ?oattr rdfs:label ?oattrlabel.
      }
    }
    optional {
      ?o rdfs:label ?label.
    }
    optional {
      ?o skos:prefLabel ?prefLabel.
    }
    optional {
      ?o dc:title ?title.
    }
    optional {
      ?o foaf:name ?name.
    }
}''',
                        initNs=NS.prefixes,
                        initBindings={'e': self.identifier}):
                    if len(quad) == 3:
                        s, p, o = quad
                    else:
                        # Last term is never used
                        s, p, o, _ = quad
                    result.add((s, p, o))
#                except:
#                    pass
                self._description = result.resource(self.identifier)
#                except Exception as e:
#                    print str(e), self.identifier
#                    raise e
            return self._description

    def get_resource(self, entity, async_=True, retrieve=True):
        if retrieve:
            mapped_name, importer = self.map_entity(entity)

            if mapped_name is not None:
                entity = mapped_name

            if importer is None:
                importer = self.find_importer(entity)
            print(entity, importer)

            if importer is not None:
                modified = importer.last_modified(entity, self.db,
                                                  self.nanopub_manager)
                if modified is None or async_ is False:
                    self.run_importer(entity)
                elif not importer.import_once:
                    print("Type of modified is", type(modified))
                    self.run_importer.delay(entity)

        return self.Entity(self.db, entity)

    def configure_template_filters(self):
        filters.configure(self)
        if 'filters' in self.config:
            for name, fn in self.config['filters'].items():
                self.template_filter(name)(fn)

    def add_file(self, f, entity, nanopub):
        entity = rdflib.URIRef(entity)
        old_nanopubs = []
        for np_uri, np_assertion, in self.db.query(
                '''select distinct ?np ?assertion where {
    hint:Query hint:optimizer "Runtime" .
    graph ?assertion {?e whyis:hasFileID ?fileid}
    ?np np:hasAssertion ?assertion.
}''',
                initNs=NS.prefixes,
                initBindings=dict(e=rdflib.URIRef(entity))):
            if not self._can_edit(np_uri):
                raise Unauthorized()
            old_nanopubs.append((np_uri, np_assertion))
        fileid = self.file_depot.create(f.stream, f.filename, f.mimetype)
        nanopub.add((nanopub.identifier, NS.sio.isAbout, entity))
        nanopub.assertion.add((entity, NS.whyis.hasFileID, Literal(fileid)))
        if current_user._get_current_object() is not None and hasattr(
                current_user, 'identifier'):
            nanopub.assertion.add(
                (entity, NS.dc.contributor, current_user.identifier))
        nanopub.assertion.add(
            (entity, NS.dc.created, Literal(datetime.utcnow())))
        nanopub.assertion.add(
            (entity, NS.ov.hasContentType, Literal(f.mimetype)))
        nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype]))
        nanopub.assertion.add(
            (NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add(
            (entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]]))
        nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]],
                               NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File))

        if current_user._get_current_object() is not None and hasattr(
                current_user, 'identifier'):
            nanopub.pubinfo.add((nanopub.assertion.identifier,
                                 NS.dc.contributor, current_user.identifier))
        nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created,
                             Literal(datetime.utcnow())))

        return old_nanopubs

    def delete_file(self, entity):
        for np_uri, in self.db.query('''select distinct ?np where {
    hint:Query hint:optimizer "Runtime" .
    graph ?np_assertion {?e whyis:hasFileID ?fileid}
    ?np np:hasAssertion ?np_assertion.
}''',
                                     initNs=NS.prefixes,
                                     initBindings=dict(e=entity)):
            if not self._can_edit(np_uri):
                raise Unauthorized()
            self.nanopub_manager.retire(np_uri)

    def add_files(self, uri, files, upload_type=NS.pv.File):
        nanopub = self.nanopub_manager.new()

        added_files = False

        old_nanopubs = []
        nanopub.assertion.add((uri, self.NS.RDF.type, upload_type))
        if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"):
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri + "/" + filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add((uri, NS.dc.hasPart, file_uri))
                    added_files = True
        elif upload_type == NS.dcat.Dataset:
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri + "/" + filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add(
                        (uri, NS.dcat.distribution, file_uri))
                    nanopub.assertion.add(
                        (file_uri, NS.RDF.type, NS.dcat.Distribution))
                    nanopub.assertion.add(
                        (file_uri, NS.dcat.downloadURL, file_uri))
                    added_files = True
        else:
            for f in files:
                if f.filename != '':
                    old_nanopubs.extend(self.add_file(f, uri, nanopub))
                    nanopub.assertion.add((uri, NS.RDF.type, NS.pv.File))
                    added_files = True
                    break

        if added_files:
            for old_np, old_np_assertion in old_nanopubs:
                nanopub.pubinfo.add((nanopub.assertion.identifier,
                                     NS.prov.wasRevisionOf, old_np_assertion))
                self.nanopub_manager.retire(old_np)

            for n in self.nanopub_manager.prepare(nanopub):
                self.nanopub_manager.publish(n)

    def _can_edit(self, uri):
        if self.managed:
            return True
        if current_user._get_current_object() is None:
            # This isn't null even when not authenticated, unless we are an autonomic agent.
            return True
        if not hasattr(current_user,
                       'identifier'):  # This is an anonymous user.
            return False
        if current_user.has_role('Publisher') or current_user.has_role(
                'Editor') or current_user.has_role('Admin'):
            return True
        if self.db.query('''ask {
    ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info.
    graph ?info { ?assertion dc:contributor ?user. }
}''',
                         initBindings=dict(nanopub=uri,
                                           user=current_user.identifier),
                         initNs=dict(np=self.NS.np, dc=self.NS.dc)):
            #print "Is owner."
            return True
        return False

    def configure_views(self):
        def sort_by(resources, property):
            return sorted(resources, key=lambda x: x.value(property))

        def camel_case_split(identifier):
            matches = finditer(
                '.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)',
                identifier)
            return [m.group(0) for m in matches]

        label_properties = [
            self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.schema.name,
            self.NS.dc.title, self.NS.foaf.name, self.NS.schema.name,
            self.NS.skos.notation
        ]

        @lru_cache(maxsize=1000)
        def get_remote_label(uri):
            for db in [self.db, self.admin_db]:
                g = Graph()
                try:
                    db.nsBindings = {}
                    g += db.query('''select ?s ?p ?o where {
                        hint:Query hint:optimizer "Runtime" .

                         ?s ?p ?o.}''',
                                  initNs=self.NS.prefixes,
                                  initBindings=dict(s=uri))
                    db.nsBindings = {}
                except:
                    pass
                resource_entity = g.resource(uri)
                if len(resource_entity.graph) == 0:
                    #print "skipping", db
                    continue
                for property in label_properties:
                    labels = self.lang_filter(resource_entity[property])
                    if len(labels) > 0:
                        return labels[0]

                if len(labels) == 0:
                    name = [
                        x.value for x in [
                            resource_entity.value(self.NS.foaf.givenName),
                            resource_entity.value(self.NS.foaf.familyName)
                        ] if x is not None
                    ]
                    if len(labels) == 0:
                        name = [
                            x.value for x in [
                                resource_entity.value(
                                    self.NS.schema.givenName),
                                resource_entity.value(
                                    self.NS.schema.familyName)
                            ] if x is not None
                        ]
                        if len(name) > 0:
                            label = ' '.join(name)
                            return label
            try:
                label = self.db.qname(uri).split(":")[1].replace("_", " ")
                return ' '.join(camel_case_split(label)).title()
            except Exception as e:
                print(str(e), uri)
                return str(uri)

        def get_label(resource):
            for property in label_properties:
                labels = self.lang_filter(resource[property])
                #print "mem", property, label
                if len(labels) > 0:
                    return labels[0]
            return get_remote_label(resource.identifier)

        self.get_label = get_label

        def initialize_g():
            if not hasattr(g, "initialized"):
                g.initialized = True
                g.ns = self.NS
                g.get_summary = get_summary
                g.get_label = get_label
                g.labelize = self.labelize
                g.get_resource = self.get_resource
                g.get_entity = self.get_entity
                g.rdflib = rdflib
                g.isinstance = isinstance
                g.current_user = current_user
                g.slugify = slugify
                g.db = self.db

        self.initialize_g = initialize_g

        @self.before_request
        def load_forms():
            if 'authenticators' in self.config:
                for authenticator in self.config['authenticators']:
                    user = authenticator.authenticate(request, self.datastore,
                                                      self.config)
                    if user is not None:
                        #    login_user(user)
                        break
            initialize_g()

        @self.login_manager.user_loader
        def load_user(user_id):
            if user_id != None:
                #try:
                user = self.datastore.find_user(id=user_id)
                return user
                #except:
                #    return None
            else:
                return None

        # def get_graphs(graphs):
        #     query = '''select ?s ?p ?o ?g where {
        #         hint:Query hint:optimizer "Runtime" .
        #
        #         graph ?g {?s ?p ?o}
        #         } values ?g { %s }'''
        #     query = query % ' '.join([graph.n3() for graph in graphs])
        #     #print query
        #     quads = self.db.store.query(query, initNs=self.NS.prefixes)
        #     result = rdflib.Dataset()
        #     result.addN(quads)
        #     return result

#         def explain(graph):
#             values = ')\n  ('.join([' '.join([x.n3() for x in triple]) for triple in graph.triples((None,None,None))])
#             values = 'VALUES (?s ?p ?o)\n{\n('+ values + ')\n}'
#
#             try:
#                 nanopubs = self.db.query('''select distinct ?np where {
#     hint:Query hint:optimizer "Runtime" .
#     ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
#         np:hasPublicationInfo ?pubinfo;
#         np:hasAssertion ?assertion;
#     graph ?assertion { ?s ?p ?o.}
# }''' + values, initNs=self.NS.prefixes)
#                 result = ConjunctiveGraph()
#                 for nanopub_uri, in nanopubs:
#                     self.nanopub_manager.get(nanopub_uri, result)
#             except Exception as e:
#                 print(str(e), entity)
#                 raise e
#             return result.resource(entity)

        def get_entity_sparql(entity):
            try:
                statements = self.db.query(
                    '''select distinct ?s ?p ?o ?g where {
    hint:Query hint:optimizer "Runtime" .
            ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
                np:hasPublicationInfo ?pubinfo;
                np:hasAssertion ?assertion;

            {graph ?np { ?np sio:isAbout ?e.}}
            UNION
            {graph ?assertion { ?e ?p ?o.}}
            graph ?g { ?s ?p ?o }
        }''',
                    initBindings={'e': entity},
                    initNs=self.NS.prefixes)
                result = ConjunctiveGraph()
                result.addN(statements)
            except Exception as e:
                print(str(e), entity)
                raise e
            #print result.serialize(format="trig")
            return result.resource(entity)

#         def get_entity_disk(entity):
#             try:
#                 nanopubs = self.db.query('''select distinct ?np where {
#     hint:Query hint:optimizer "Runtime" .
#             ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
#                 np:hasPublicationInfo ?pubinfo;
#                 np:hasAssertion ?assertion;
#
#             {graph ?np { ?np sio:isAbout ?e.}}
#             UNION
#             {graph ?assertion { ?e ?p ?o.}}
#         }''',initBindings={'e':entity}, initNs=self.NS.prefixes)
#                 result = ConjunctiveGraph()
#                 for nanopub_uri, in nanopubs:
#                     self.nanopub_manager.get(nanopub_uri, result)
# #                result.addN(nanopubs)
#             except Exception as e:
#                 print(str(e), entity)
#                 raise e
#             #print result.serialize(format="trig")
#             return result.resource(entity)

        get_entity = get_entity_sparql

        self.get_entity = get_entity

        def get_summary(resource):
            summary_properties = [
                self.NS.skos.definition, self.NS.schema.description,
                self.NS.dc.abstract, self.NS.dc.description,
                self.NS.dc.summary, self.NS.RDFS.comment,
                self.NS.dcelements.description,
                URIRef("http://purl.obolibrary.org/obo/IAO_0000115"),
                self.NS.prov.value, self.NS.sio.hasValue
            ]
            if 'summary_properties' in self.config:
                summary_properties.extend(self.config['summary_properties'])
            for property in summary_properties:
                terms = self.lang_filter(resource[property])
                for term in terms:
                    yield (property, term)

        self.get_summary = get_summary

        if 'WHYIS_CDN_DIR' in self.config and self.config[
                'WHYIS_CDN_DIR'] is not None:

            @self.route('/cdn/<path:filename>')
            def cdn(filename):
                return send_from_directory(self.config['WHYIS_CDN_DIR'],
                                           filename)

        def render_view(resource, view=None, args=None, use_cache=True):
            self.initialize_g()
            if view is None and 'view' in request.args:
                view = request.args['view']

            if view is None:
                view = 'view'

            if use_cache and self.cache is not None:
                key = str((str(resource.identifier), view))
                result = self.cache.get(key)
                if result is not None:
                    r, headers = result
                    return r, 200, headers
            template_args = dict()
            template_args.update(self.template_imports)
            template_args.update(
                dict(ns=self.NS,
                     this=resource,
                     g=g,
                     current_user=current_user,
                     isinstance=isinstance,
                     args=request.args if args is None else args,
                     url_for=url_for,
                     app=self,
                     view=view,
                     get_entity=get_entity,
                     get_summary=get_summary,
                     search=search,
                     rdflib=rdflib,
                     config=self.config,
                     hasattr=hasattr,
                     set=set))

            types = []
            if 'as' in request.args:
                types = [URIRef(request.args['as']), 0]

            types.extend(
                (x, 1) for x in self.vocab[resource.identifier:NS.RDF.type])
            if len(
                    types
            ) == 0:  # KG types cannot override vocab types. This should keep views stable where critical.
                types.extend([(x.identifier, 1) for x in resource[NS.RDF.type]
                              if isinstance(x.identifier, rdflib.URIRef)])
            #if len(types) == 0:
            types.append([self.NS.RDFS.Resource, 100])
            type_string = ' '.join(
                ["(%s %d '%s')" % (x.n3(), i, view) for x, i in types])
            view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c ?content_type where {
    values (?c ?priority ?id) { %s }
    ?c rdfs:subClassOf* ?mid.
    ?mid rdfs:subClassOf* ?class.
    ?class ?viewProperty ?view.
    ?viewProperty rdfs:subPropertyOf* whyis:hasView.
    ?viewProperty dc:identifier ?id.
    optional {
        ?viewProperty dc:format ?content_type
    }
} group by ?c ?class ?content_type order by ?rank
''' % type_string

            #print view_query
            views = list(
                self.vocab.query(view_query,
                                 initNs=dict(whyis=self.NS.whyis,
                                             dc=self.NS.dc)))
            if len(views) == 0:
                abort(404)

            headers = {'Content-Type': "text/html"}
            extension = views[0]['view'].value.split(".")[-1]
            if extension in DATA_EXTENSIONS:
                headers['Content-Type'] = DATA_EXTENSIONS[extension]
            print(views[0]['view'], views[0]['content_type'])
            if views[0]['content_type'] is not None:
                headers['Content-Type'] = views[0]['content_type']

            # default view (list of nanopubs)
            # if available, replace with class view
            # if available, replace with instance view
            return render_template(views[0]['view'].value,
                                   **template_args), 200, headers

        self.render_view = render_view

        # Register blueprints
        self.register_blueprint(nanopub_blueprint)
        self.register_blueprint(sparql_blueprint)
        self.register_blueprint(entity_blueprint)
        self.register_blueprint(tableview_blueprint)

    def get_entity_uri(self, name, format):
        content_type = None
        if format is not None:
            if format in DATA_EXTENSIONS:
                content_type = DATA_EXTENSIONS[format]
            else:
                name = '.'.join([name, format])
        if name is not None:
            entity = self.NS.local[name]
        elif 'uri' in request.args:
            entity = URIRef(request.args['uri'])
        else:
            entity = self.NS.local.Home
        return entity, content_type

    def get_send_file_max_age(self, filename):
        if self.debug:
            return 0
        else:
            return Empty.get_send_file_max_age(self, filename)
Example #6
0
class App(Empty):

    managed = False
    
    def configure_extensions(self):

        Empty.configure_extensions(self)
        self.celery = Celery(self.name, broker=self.config['CELERY_BROKER_URL'], beat=True)
        self.celery.conf.update(self.config)
        
        app = self

        self.redis = self.celery.broker_connection().default_channel.client
        
        if 'root_path' in self.config:
            self.root_path = self.config['root_path']
        
        if 'WHYIS_TEMPLATE_DIR' in self.config and app.config['WHYIS_TEMPLATE_DIR'] is not None:
            my_loader = jinja2.ChoiceLoader(
                [jinja2.FileSystemLoader(p) for p in self.config['WHYIS_TEMPLATE_DIR']] 
                + [app.jinja_loader]
            )
            app.jinja_loader = my_loader
        
        def setup_task(service):
            service.app = app
            print(service)
            result = None
            if service.query_predicate == self.NS.whyis.globalChangeQuery:
                result = process_resource
            else:
                result = process_nanopub
            result.service = lambda : service
            return result

        @self.celery.task
        def process_resource(service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            if is_waiting(service_name):
                print("Deferring to a later invocation.", service_name)
                return
            print(service_name)
            service.process_graph(app.db)

        @self.celery.task
        def process_nanopub(nanopub_uri, service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            print(service, nanopub_uri)
            if app.nanopub_manager.is_current(nanopub_uri):
                nanopub = app.nanopub_manager.get(nanopub_uri)
                service.process_graph(nanopub)
            else:
                print("Skipping retired nanopub", nanopub_uri)

        def setup_periodic_task(task):
            @self.celery.task
            def find_instances():
                print("Triggered task", task['name'])
                for x, in task['service'].getInstances(app.db):
                    task['do'](x)
            
            @self.celery.task
            def do_task(uri):
                print("Running task", task['name'], 'on', uri)
                resource = app.get_resource(uri)

                # result never used
                task['service'].process_graph(resource.graph)

            task['service'].app = app
            task['find_instances'] = find_instances
            task['do'] = do_task

            return task
            
        app.inference_tasks = []
        if 'inference_tasks' in self.config:
            app.inference_tasks = [setup_periodic_task(task) for task in self.config['inference_tasks']]

        for name, task in list(self.config['inferencers'].items()):
            task.app = app
            
        for task in app.inference_tasks:
            if 'schedule' in task:
                #print "Scheduling task", task['name'], task['schedule']
                self.celery.add_periodic_task(
                    crontab(**task['schedule']),
                    task['find_instances'].s(),
                    name=task['name']
                )
            else:
                task['find_instances'].delay()

        def is_waiting(service_name):
            """
            Check if a task is waiting.
            """
            scheduled_tasks = list(inspect().scheduled().values())[0]
            for task in scheduled_tasks:
                if 'kwargs' in task:
                    args = eval(task['kwargs'])
                    if service_name == args.get('service_name',None):
                        return True
            return False
                
        def is_running_waiting(service_name):
            """
            Check if a task is running or waiting.
            """
            if is_waiting(service_name):
                return True
            running_tasks = list(inspect().active().values())[0]
            for task in running_tasks:
                if 'kwargs' in task:
                    args = eval(task['kwargs'])
                    if service_name == args.get('service_name',None):
                        return True
            return False
                        
        @self.celery.task()
        def update(nanopub_uri):
            '''gets called whenever there is a change in the knowledge graph.
            Performs a breadth-first knowledge expansion of the current change.'''
            #print "Updating on", nanopub_uri
            if not app.nanopub_manager.is_current(nanopub_uri):
                print("Skipping retired nanopub", nanopub_uri)
                return
            nanopub = app.nanopub_manager.get(nanopub_uri)
            nanopub_graph = ConjunctiveGraph(nanopub.store)
            if 'inferencers' in self.config:
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.updateChangeQuery:
                        #print "checking", name, nanopub_uri, service.get_query()
                        if service.getInstances(nanopub_graph):
                            print("invoking", name, nanopub_uri)
                            process_nanopub.apply_async(kwargs={'nanopub_uri': nanopub_uri, 'service_name':name}, priority=1 )
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.globalChangeQuery and not is_running_waiting(name):
                        #print "checking", name, service.get_query()
                        process_resource.apply_async(kwargs={'service_name':name}, priority=5)

        def run_update(nanopub_uri):
            update.apply_async(args=[nanopub_uri],priority=9)
        self.nanopub_update_listener = run_update

        def is_waiting_importer(entity_name, exclude=None):
            """
            Check if a task is running or waiting.
            """
            if inspect().scheduled():
                tasks = list(inspect().scheduled().values())
                for task in tasks:
                    if 'args' in task and entity_name in task['args']:
                        return True
            return False

        app = self
        @self.celery.task(retry_backoff=True, retry_jitter=True,autoretry_for=(Exception,),max_retries=4, bind=True)
        def run_importer(self, entity_name):
            entity_name = URIRef(entity_name)
            counter = app.redis.incr(("import",entity_name))
            if counter > 1:
                return
            print('importing', entity_name)
            importer = app.find_importer(entity_name)
            if importer is None:
                return
            importer.app = app
            modified = importer.last_modified(entity_name, app.db, app.nanopub_manager)
            updated = importer.modified(entity_name)
            if updated is None:
                updated = datetime.now(pytz.utc)
            print("Remote modified:", updated, type(updated), "Local modified:", modified, type(modified))
            if modified is None or (updated - modified).total_seconds() > importer.min_modified:
                importer.load(entity_name, app.db, app.nanopub_manager)
            app.redis.set(("import",entity_name),0)
        self.run_importer = run_importer

        self.template_imports = {}
        if 'template_imports' in self.config:
            for name, imp in list(self.config['template_imports'].items()):
                try:
                    m = importlib.import_module(imp)
                    self.template_imports[name] = m
                except Exception:
                    print("Error importing module %s into template variable %s." % (imp, name))
                    raise
        

    def configure_database(self):
        """
        Database configuration should be set here
        """
        self.NS = NS
        self.NS.local = rdflib.Namespace(self.config['lod_prefix']+'/')

        self.admin_db = database.engine_from_config(self.config, "admin_")
        self.db = database.engine_from_config(self.config, "knowledge_")
        self.db.app = self

        self.vocab = ConjunctiveGraph()
        #print URIRef(self.config['vocab_file'])
        default_vocab = Graph(store=self.vocab.store)
        default_vocab.parse("default_vocab.ttl", format="turtle", publicID=str(self.NS.local))
        custom_vocab = Graph(store=self.vocab.store)
        custom_vocab.parse(self.config['vocab_file'], format="turtle", publicID=str(self.NS.local))


        self.datastore = WhyisUserDatastore(self.admin_db, {}, self.config['lod_prefix'])
        self.security = Security(self, self.datastore,
                                 register_form=ExtendedRegisterForm)

        self.file_depot = DepotManager.get('files')
        if self.file_depot is None:
            DepotManager.configure('files', self.config['file_archive'])
            self.file_depot = DepotManager.get('files')
        if DepotManager.get('nanopublications') is None:
            DepotManager.configure('nanopublications', self.config['nanopub_archive'])

    def weighted_route(self, *args, **kwargs):
        def decorator(view_func):
            compare_key = kwargs.pop('compare_key', None)
            # register view_func with route
            self.route(*args, **kwargs)(view_func)
    
            if compare_key is not None:
                rule = self.url_map._rules[-1]
                rule.match_compare_key = lambda: compare_key
    
            return view_func
        return decorator

    def map_entity(self, name):
        for importer in self.config['namespaces']:
            if importer.matches(name):
                new_name = importer.map(name)
                #print 'Found mapped URI', new_name
                return new_name, importer
        return None, None

    def find_importer(self, name):
        for importer in self.config['namespaces']:
            if importer.resource_matches(name):
                return importer
        return None


    class Entity (rdflib.resource.Resource):
        _this = None
        
        def this(self):
            if self._this is None:
                self._this = self._graph.app.get_entity(self.identifier)
            return self._this

        _description = None

        def description(self):
            if self._description is None:
#                try:
                result = Graph()
#                try:
                for quad in self._graph.query('''
construct {
    ?e ?p ?o.
    ?o rdfs:label ?label.
    ?o skos:prefLabel ?prefLabel.
    ?o dc:title ?title.
    ?o foaf:name ?name.
    ?o ?pattr ?oatter.
    ?oattr rdfs:label ?oattrlabel
} where {
    graph ?g {
      ?e ?p ?o.
    }
    ?g a np:Assertion.
    optional {
      ?e sio:hasAttribute|sio:hasPart ?o.
      ?o ?pattr ?oattr.
      optional {
        ?oattr rdfs:label ?oattrlabel.
      }
    }
    optional {
      ?o rdfs:label ?label.
    }
    optional {
      ?o skos:prefLabel ?prefLabel.
    }
    optional {
      ?o dc:title ?title.
    }
    optional {
      ?o foaf:name ?name.
    }
}''', initNs=NS.prefixes, initBindings={'e':self.identifier}):
                    if len(quad) == 3:
                        s,p,o = quad
                    else:
                        # Last term is never used
                        s,p,o,_ = quad
                    result.add((s,p,o))
#                except:
#                    pass
                self._description = result.resource(self.identifier)
#                except Exception as e:
#                    print str(e), self.identifier
#                    raise e
            return self._description
        
    def get_resource(self, entity, async_=True, retrieve=True):
        if retrieve:
            mapped_name, importer = self.map_entity(entity)
    
            if mapped_name is not None:
                entity = mapped_name

            if importer is None:
                importer = self.find_importer(entity)
            print(entity, importer)

            if importer is not None:
                modified = importer.last_modified(entity, self.db, self.nanopub_manager)
                if modified is None or async_ is False:
                    self.run_importer(entity)
                elif not importer.import_once:
                    print("Type of modified is",type(modified))
                    self.run_importer.delay(entity)
                    
        return self.Entity(self.db, entity)
    
    def configure_template_filters(self):
        filters.configure(self)
        if 'filters' in self.config:
            for name, fn in self.config['filters'].items():
                self.template_filter(name)(fn)


    def add_file(self, f, entity, nanopub):
        entity = rdflib.URIRef(entity)
        old_nanopubs = []
        for np_uri, np_assertion, in self.db.query('''select distinct ?np ?assertion where {
    hint:Query hint:optimizer "Runtime" .
    graph ?assertion {?e whyis:hasFileID ?fileid}
    ?np np:hasAssertion ?assertion.
}''', initNs=NS.prefixes, initBindings=dict(e=rdflib.URIRef(entity))):
            if not self._can_edit(np_uri):
                raise Unauthorized()
            old_nanopubs.append((np_uri, np_assertion))
        fileid = self.file_depot.create(f.stream, f.filename, f.mimetype)
        nanopub.add((nanopub.identifier, NS.sio.isAbout, entity))
        nanopub.assertion.add((entity, NS.whyis.hasFileID, Literal(fileid)))
        if current_user._get_current_object() is not None and hasattr(current_user, 'identifier'):
            nanopub.assertion.add((entity, NS.dc.contributor, current_user.identifier))
        nanopub.assertion.add((entity, NS.dc.created, Literal(datetime.utcnow())))
        nanopub.assertion.add((entity, NS.ov.hasContentType, Literal(f.mimetype)))
        nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype]))
        nanopub.assertion.add((NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]]))
        nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]], NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File))

        if current_user._get_current_object() is not None and hasattr(current_user, 'identifier'):
            nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.contributor, current_user.identifier))
        nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created, Literal(datetime.utcnow())))

        return old_nanopubs

    def delete_file(self, entity):
        for np_uri, in self.db.query('''select distinct ?np where {
    hint:Query hint:optimizer "Runtime" .
    graph ?np_assertion {?e whyis:hasFileID ?fileid}
    ?np np:hasAssertion ?np_assertion.
}''', initNs=NS.prefixes, initBindings=dict(e=entity)):
            if not self._can_edit(np_uri):
                raise Unauthorized()
            self.nanopub_manager.retire(np_uri)
        
                
    def add_files(self, uri, files, upload_type=NS.pv.File):
        nanopub = self.nanopub_manager.new()

        added_files = False

        old_nanopubs = []
        nanopub.assertion.add((uri, self.NS.RDF.type, upload_type))
        if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"):
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri+"/"+filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add((uri, NS.dc.hasPart, file_uri))
                    added_files = True
        elif upload_type == NS.dcat.Dataset:
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri+"/"+filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add((uri, NS.dcat.distribution, file_uri))
                    nanopub.assertion.add((file_uri, NS.RDF.type, NS.dcat.Distribution))
                    nanopub.assertion.add((file_uri, NS.dcat.downloadURL, file_uri))
                    added_files = True
        else:
            for f in files:
                if f.filename != '':
                    old_nanopubs.extend(self.add_file(f, uri, nanopub))
                    nanopub.assertion.add((uri, NS.RDF.type, NS.pv.File))
                    added_files = True
                    break

        if added_files:
            for old_np, old_np_assertion in old_nanopubs:
                nanopub.pubinfo.add((nanopub.assertion.identifier, NS.prov.wasRevisionOf, old_np_assertion))
                self.nanopub_manager.retire(old_np)
            
            for n in self.nanopub_manager.prepare(nanopub):
                self.nanopub_manager.publish(n)

    def _can_edit(self, uri):
        if self.managed:
            return True
        if current_user._get_current_object() is None:
            # This isn't null even when not authenticated, unless we are an autonomic agent.
            return True
        if not hasattr(current_user, 'identifier'): # This is an anonymous user.
            return False
        if current_user.has_role('Publisher') or current_user.has_role('Editor')  or current_user.has_role('Admin'):
            return True
        if self.db.query('''ask {
    ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info.
    graph ?info { ?assertion dc:contributor ?user. }
}''', initBindings=dict(nanopub=uri, user=current_user.identifier), initNs=dict(np=self.NS.np, dc=self.NS.dc)):
            #print "Is owner."
            return True
        return False

    def configure_views(self):

        def sort_by(resources, property):
            return sorted(resources, key=lambda x: x.value(property))

        def camel_case_split(identifier):
            matches = finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
            return [m.group(0) for m in matches]

        label_properties = [self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.schema.name, self.NS.dc.title, self.NS.foaf.name, self.NS.schema.name]

        @lru_cache(maxsize=1000)
        def get_remote_label(uri):
            for db in [self.db, self.admin_db]:
                g = Graph()
                try:
                    db.nsBindings = {}
                    g += db.query('''select ?s ?p ?o where {
                        hint:Query hint:optimizer "Runtime" .

                         ?s ?p ?o.}''',
                                  initNs=self.NS.prefixes, initBindings=dict(s=uri))
                    db.nsBindings = {}
                except:
                    pass
                resource_entity = g.resource(uri)
                if len(resource_entity.graph) == 0:
                    #print "skipping", db
                    continue
                for property in label_properties:
                    labels = self.lang_filter(resource_entity[property])
                    if len(labels) > 0:
                        return labels[0]
                    
                if len(labels) == 0:
                    name = [x.value for x in [resource_entity.value(self.NS.foaf.givenName),
                                              resource_entity.value(self.NS.foaf.familyName)] if x is not None]
                    if len(name) > 0:
                        label = ' '.join(name)
                        return label
            try:
                label = self.db.qname(uri).split(":")[1].replace("_"," ")
                return ' '.join(camel_case_split(label)).title()
            except Exception as e:
                print(str(e), uri)
                return str(uri)
        
        def get_label(resource):
            for property in label_properties:
                labels = self.lang_filter(resource[property])
                #print "mem", property, label
                if len(labels) > 0:
                    return labels[0]
            return get_remote_label(resource.identifier)
            
        @self.before_request
        def load_forms():
            if 'authenticators' in self.config:
                for authenticator in self.config['authenticators']:
                    user = authenticator.authenticate(request, self.datastore, self.config)
                    if user is not None:
                    #    login_user(user)
                        break
                
            #g.search_form = SearchForm()
            g.ns = self.NS
            g.get_summary = get_summary
            g.get_label = get_label
            g.labelize = self.labelize
            g.get_resource = self.get_resource
            g.get_entity = self.get_entity
            g.rdflib = rdflib
            g.isinstance = isinstance
            g.current_user = current_user
            g.slugify = slugify
            g.db = self.db

        @self.login_manager.user_loader
        def load_user(user_id):
            if user_id != None:
                #try:
                user = self.datastore.find_user(id=user_id)
                return user
                #except:
                #    return None
            else:
                return None

        extensions = {
            "rdf": "application/rdf+xml",
            "jsonld": "application/ld+json",
            "json": "application/json",
            "ttl": "text/turtle",
            "trig": "application/trig",
            "turtle": "text/turtle",
            "owl": "application/rdf+xml",
            "nq": "application/n-quads",
            "nt": "application/n-triples",
            "html": "text/html"
        }

        dataFormats = {
            "application/rdf+xml" : "xml",
            "application/ld+json" : 'json-ld',
            "application/json" : 'json-ld',
            "text/turtle" : "turtle",
            "application/trig" : "trig",
            "application/n-quads" : "nquads",
            "application/n-triples" : "nt",
            "application/rdf+json" : "json",
            "text/html" : None,
            "application/xhtml+xml" : None,
            "application/xhtml" : None,
            None: "json-ld"
        }

        htmls = set(['application/xhtml','text/html', 'application/xhtml+xml'])


        def get_graphs(graphs):
            query = '''select ?s ?p ?o ?g where {
                hint:Query hint:optimizer "Runtime" .

                graph ?g {?s ?p ?o}
                } values ?g { %s }'''
            query = query % ' '.join([graph.n3() for graph in graphs])
            #print query
            quads = self.db.store.query(query, initNs=self.NS.prefixes)
            result = rdflib.Dataset()
            result.addN(quads)
            return result

        def explain(graph):
            values = ')\n  ('.join([' '.join([x.n3() for x in triple]) for triple in graph.triples((None,None,None))])
            values = 'VALUES (?s ?p ?o)\n{\n('+ values + ')\n}'
            
            try:
                nanopubs = self.db.query('''select distinct ?np where {
    hint:Query hint:optimizer "Runtime" .
    ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
        np:hasPublicationInfo ?pubinfo;
        np:hasAssertion ?assertion;
    graph ?assertion { ?s ?p ?o.}
}''' + values, initNs=self.NS.prefixes)
                result = ConjunctiveGraph()
                for nanopub_uri, in nanopubs:
                    self.nanopub_manager.get(nanopub_uri, result)
            except Exception as e:
                print(str(e), entity)
                raise e
            return result.resource(entity)
        
        def get_entity_sparql(entity):
            try:
                statements = self.db.query('''select distinct ?s ?p ?o ?g where {
    hint:Query hint:optimizer "Runtime" .
            ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
                np:hasPublicationInfo ?pubinfo;
                np:hasAssertion ?assertion;

            {graph ?np { ?np sio:isAbout ?e.}}
            UNION
            {graph ?assertion { ?e ?p ?o.}}
            graph ?g { ?s ?p ?o }
        }''',initBindings={'e':entity}, initNs=self.NS.prefixes)
                result = ConjunctiveGraph()
                result.addN(statements)
            except Exception as e:
                print(str(e), entity)
                raise e
            #print result.serialize(format="trig")
            return result.resource(entity)
            
        
        def get_entity_disk(entity):
            try:
                nanopubs = self.db.query('''select distinct ?np where {
    hint:Query hint:optimizer "Runtime" .
            ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
                np:hasPublicationInfo ?pubinfo;
                np:hasAssertion ?assertion;

            {graph ?np { ?np sio:isAbout ?e.}}
            UNION
            {graph ?assertion { ?e ?p ?o.}}
        }''',initBindings={'e':entity}, initNs=self.NS.prefixes)
                result = ConjunctiveGraph()
                for nanopub_uri, in nanopubs:
                    self.nanopub_manager.get(nanopub_uri, result)
#                result.addN(nanopubs)
            except Exception as e:
                print(str(e), entity)
                raise e
            #print result.serialize(format="trig")
            return result.resource(entity)

        get_entity = get_entity_sparql
        
        self.get_entity = get_entity

        def get_summary(resource):
            summary_properties = [
                self.NS.skos.definition,
                self.NS.schema.description,
                self.NS.dc.abstract,
                self.NS.dc.description,
                self.NS.dc.summary,
                self.NS.RDFS.comment,
                self.NS.dcelements.description,
                URIRef("http://purl.obolibrary.org/obo/IAO_0000115"),
                self.NS.prov.value,
                self.NS.sio.hasValue
            ]
            if 'summary_properties' in self.config:
                summary_properties.extend(self.config['summary_properties'])
            for property in summary_properties:
                terms = self.lang_filter(resource[property])
                for term in terms:
                    yield (property, term)

        self.get_summary = get_summary

        @self.route('/sparql', methods=['GET', 'POST'])
        @conditional_login_required
        def sparql_view():
            has_query = False
            for arg in list(request.args.keys()):
                if arg.lower() == "update":
                    return "Update not allowed.", 403
                if arg.lower() == 'query':
                    has_query = True
            if request.method == 'GET' and not has_query:
                return redirect(url_for('sparql_form'))
            #print self.db.store.query_endpoint
            if request.method == 'GET':
                headers = {}
                headers.update(request.headers)
                if 'Content-Length' in headers:
                    del headers['Content-Length']
                req = requests.get(self.db.store.query_endpoint,
                                   headers = headers, params=request.args)
            elif request.method == 'POST':
                if 'application/sparql-update' in request.headers['content-type']:
                    return "Update not allowed.", 403
                #print(request.get_data())
                req = requests.post(self.db.store.query_endpoint, data=request.get_data(),
                                    headers = request.headers, params=request.args)
            #print self.db.store.query_endpoint
            #print req.status_code
            response = Response(req.content, content_type = req.headers['content-type'])
            #response.headers[con(req.headers)
            return response, req.status_code
        
        @self.route('/sparql.html')
        @conditional_login_required
        def sparql_form():
            
            template_args = dict(ns=self.NS,
                                 g=g,
                                 config=self.config,
                                 current_user=current_user,
                                 isinstance=isinstance,
                                 rdflib=rdflib,
                                 hasattr=hasattr,
                                 set=set)

            return render_template('sparql.html',endpoint="/sparql", **template_args)

        
        if 'WHYIS_CDN_DIR' in self.config and self.config['WHYIS_CDN_DIR'] is not None:
            @self.route('/cdn/<path:filename>')
            def cdn(filename):
                return send_from_directory(self.config['WHYIS_CDN_DIR'], filename)

        @self.route('/about.<format>', methods=['GET','POST','DELETE'])
        @self.weighted_route('/<path:name>', compare_key=bottom_compare_key, methods=['GET','POST','DELETE'])
        @self.weighted_route('/<path:name>.<format>', compare_key=bottom_compare_key, methods=['GET','POST','DELETE'])
        @self.route('/', methods=['GET','POST','DELETE'])
        @self.route('/home', methods=['GET','POST','DELETE'])
        @self.route('/about', methods=['GET','POST','DELETE'])
        @conditional_login_required
        def view(name=None, format=None, view=None):
            self.db.store.nsBindings = {}
            content_type = None
            if format is not None:
                if format in extensions:
                    content_type = extensions[format]
                else:
                    name = '.'.join([name, format])
            #argstring = '&'.join(["%s=%s"%(k,v) for k,v in request.args.iteritems(multi=True) if k != 'value'])
            if name is not None:
                #if len(argstring) > 0:
                #    name = name + "?" + argstring
                entity = self.NS.local[name]
            elif 'uri' in request.args:
                entity = URIRef(request.args['uri'])
            else:
                entity = self.NS.local.Home

            #print(request.method, 'view()', entity, view)
            if request.method == 'POST':
                print ("uploading file",entity)
                if len(request.files) == 0:
                    flash('No file uploaded')
                    return redirect(request.url)
                upload_type = rdflib.URIRef(request.form['upload_type'])
                self.add_files(entity, [y for x, y in request.files.items(multi=True)],
                               upload_type=upload_type)
                url = "/about?%s" % urlencode(dict(uri=str(entity), view="view"))
                print ("redirecting to",url)
                return redirect(url)
            elif request.method == 'DELETE':
                self.delete_file(entity)
                return '', 204
            elif request.method == 'GET':
                resource = self.get_resource(entity)

                # 'view' is the default view
                fileid = resource.value(self.NS.whyis.hasFileID)
                if fileid is not None and 'view' not in request.args:
                    print (resource.identifier, fileid)
                    f = self.file_depot.get(fileid)
                    fsa = FileServeApp(f, self.config["file_archive"].get("cache_max_age",3600*24*7))
                    return fsa
            
                if content_type is None:
                    content_type = request.headers['Accept'] if 'Accept' in request.headers else 'text/turtle'
                #print entity

                fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],content_type)
                if 'view' in request.args or fmt in htmls:
                    return render_view(resource)
                elif fmt in dataFormats:
                    output_graph = ConjunctiveGraph()
                    result, status, headers = render_view(resource, view='describe')
                    output_graph.parse(data=result, format="json-ld")
                    return output_graph.serialize(format=dataFormats[fmt]), 200, {'Content-Type':content_type}
                #elif 'view' in request.args or sadi.mimeparse.best_match(htmls, content_type) in htmls:
                else:
                    return render_view(resource)
                
        views = {}
        def render_view(resource, view=None, args=None):
            template_args = dict()
            template_args.update(self.template_imports)
            template_args.update(dict(
                ns=self.NS,
                this=resource, g=g,
                current_user=current_user,
                isinstance=isinstance,
                args=request.args if args is None else args,
                url_for=url_for,
                get_entity=get_entity,
                get_summary=get_summary,
                search = search,
                rdflib=rdflib,
                config=self.config,
                hasattr=hasattr,
                set=set))
            if view is None and 'view' in request.args:
                view = request.args['view']

            if view is None:
                view = 'view'

            types = []
            if 'as' in request.args:
                types = [URIRef(request.args['as']), 0]

            types.extend((x, 1) for x in self.vocab[resource.identifier : NS.RDF.type])
            if not types: # KG types cannot override vocab types. This should keep views stable where critical.
                types.extend([(x.identifier, 1) for x in resource[NS.RDF.type]])
            #if len(types) == 0:
            types.append([self.NS.RDFS.Resource, 100])
            type_string = ' '.join(["(%s %d '%s')" % (x.n3(), i, view) for x, i in types])
            view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c where {
    values (?c ?priority ?id) { %s }
    ?c rdfs:subClassOf* ?mid.
    ?mid rdfs:subClassOf* ?class.
    ?class ?viewProperty ?view.
    ?viewProperty rdfs:subPropertyOf* whyis:hasView.
    ?viewProperty dc:identifier ?id.
} group by ?c ?class order by ?rank
''' % type_string

            #print view_query
            views = list(self.vocab.query(view_query, initNs=dict(whyis=self.NS.whyis, dc=self.NS.dc)))
            if len(views) == 0:
                abort(404)

            headers = {'Content-Type': "text/html"}
            extension = views[0]['view'].value.split(".")[-1]
            if extension in extensions:
                headers['Content-Type'] = extensions[extension]
                

            # default view (list of nanopubs)
            # if available, replace with class view
            # if available, replace with instance view
            return render_template(views[0]['view'].value, **template_args), 200, headers
        self.render_view = render_view

        def render_nanopub(data, code, headers=None):
            if data is None:
                return make_response("<h1>Not Found</h1>", 404)

            entity = app.Entity(ConjunctiveGraph(data.store), data.identifier)
            entity.nanopub = data
            data, code, headers = render_view(entity)
            resp = make_response(data, code)
            resp.headers.extend(headers or {})
            return resp

        app = self

        self.nanopub_manager = NanopublicationManager(app.db.store,
                                                      Namespace('%s/pub/'%(app.config['lod_prefix'])),
                                                      self,
                                                      update_listener=self.nanopub_update_listener)

        def _get_graph():
            inputGraph = ConjunctiveGraph()
            contentType = request.headers['Content-Type']
            encoding = 'utf8' if not request.content_encoding else request.content_encoding
            content = str(request.data, encoding)
            fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],contentType)
            if fmt in dataFormats:
                inputGraph.parse(data=content, format=dataFormats[fmt])
            return inputGraph

        
        #decorators = [conditional_login_required]

        def _get_uri(ident):
            return URIRef('%s/pub/%s'%(app.config['lod_prefix'], ident))

        @self.route('/pub/<ident>',methods=['GET'])
        @self.route('/pub/<ident>.<format>', methods=['GET'])
        @conditional_login_required
        def get_nanopub(ident, format=None):
            #print(request.method, 'get_nanopub()', ident)
            ident = ident.split("_")[0]
            uri = _get_uri(ident)
            result = app.nanopub_manager.get(uri)
            if result is None:
                #print("cannot find", uri)
                abort(404)

            content_type = None
                        
            if format is not None and format in extensions:
                content_type = extensions[format]
            if content_type is None:
                content_type = request.headers['Accept'] if 'Accept' in request.headers else 'application/ld+json'
            fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],content_type)
            if 'view' in request.args or fmt in htmls:
                return render_nanopub(result, 200)
            elif fmt in dataFormats:
                response = Response(result.serialize(format=dataFormats[fmt]))
                response.headers = {'Content-type': fmt}
                return response, 200

        @self.route('/pub/<ident>', methods=['DELETE'])
        @login_required
        def delete_nanopub(ident):
            #print(request.method, 'delete_nanopub()', ident)
            ident = ident.split("_")[0]
            uri = _get_uri(ident)
            if not app._can_edit(uri):
                return '<h1>Not Authorized</h1>', 401
            app.nanopub_manager.retire(uri)
            return '', 204


        @self.route('/pub/<ident>', methods=['PUT'])
        @login_required
        def put_nanopub(ident):
            #print(request.method, 'put_nanopub()', ident)
            nanopub_uri = _get_uri(ident)
            inputGraph = _get_graph()
            old_nanopub = _prep_nanopub(nanopub_uri, inputGraph)
            for nanopub in app.nanopub_manager.prepare(inputGraph):
                nanopub.pubinfo.set((nanopub.assertion.identifier, app.NS.prov.wasRevisionOf, old_nanopub.assertion.identifier))
                app.nanopub_manager.retire(nanopub_uri)
                app.nanopub_manager.publish(nanopub)

        def _prep_nanopub(nanopub):
            #nanopub = Nanopublication(store=graph.store, identifier=nanopub_uri)
            about = nanopub.nanopub_resource.value(app.NS.sio.isAbout)
            #print nanopub.assertion_resource.identifier, about
            _prep_graph(nanopub.assertion_resource, about.identifier if about is not None else None)
            #_prep_graph(nanopub.pubinfo_resource, nanopub.assertion_resource.identifier)
            _prep_graph(nanopub.provenance_resource, nanopub.assertion_resource.identifier)
            nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.dc.contributor, current_user.identifier))

            return nanopub

        @self.route('/pub/<ident>',  methods=['POST'])
        @self.route('/pub',  methods=['POST'])
        @login_required
        def post_nanopub(ident=None):
            #print(request.method, 'post_nanopub()', ident)
            if ident is not None:
                return self.put(ident)
            inputGraph = _get_graph()
            #for nanopub_uri in inputGraph.subjects(rdflib.RDF.type, app.NS.np.Nanopublication):
                #nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.dc.created, Literal(datetime.utcnow())))
            headers = {}
            for nanopub in app.nanopub_manager.prepare(inputGraph):
                _prep_nanopub(nanopub)
                headers['Location'] = nanopub.identifier
                app.nanopub_manager.publish(nanopub)

            return '', 201, headers


        def _prep_graph(resource, about = None):
            #print '_prep_graph', resource.identifier, about
            content_type = resource.value(app.NS.ov.hasContentType)
            if content_type is not None:
                content_type = content_type.value
            #print 'graph content type', resource.identifier, content_type
            #print resource.graph.serialize(format="nquads")
            g = Graph(store=resource.graph.store,identifier=resource.identifier)
            text = resource.value(app.NS.prov.value)
            if content_type is not None and text is not None:
                #print 'Content type:', content_type, resource.identifier
                html = None
                if content_type in ["text/html", "application/xhtml+xml"]:
                    html = Literal(text.value, datatype=NS.RDF.HTML)
                if content_type == 'text/markdown':
                    #print "Aha, markdown!"
                    #print text.value
                    html = markdown.markdown(text.value)
                    attributes = ['vocab="%s"' % app.NS.local,
                                  'base="%s"'% app.NS.local,
                                  'prefix="%s"' % ' '.join(['%s: %s'% x for x in list(app.NS.prefixes.items())])]
                    if about is not None:
                        attributes.append('resource="%s"' % about)
                    html = '<div %s>%s</div>' % (' '.join(attributes), html)
                    html = Literal(html, datatype=NS.RDF.HTML)
                    text = html
                    content_type = "text/html"
                #print resource.identifier, content_type
                if html is not None:
                    resource.set(app.NS.sioc.content, html)
                    try:
                        g.remove((None,None,None))
                        g.parse(data=text, format='rdfa', publicID=app.NS.local)
                    except:
                        pass
                else:
                    #print "Deserializing", g.identifier, 'as', content_type
                    #print dataFormats
                    if content_type in dataFormats:
                        g.parse(data=text, format=dataFormats[content_type], publicID=app.NS.local)
                        #print len(g)
                    #else:
                        #print("not attempting to deserialize.")
#                        try:
#                            sadi.deserialize(g, text, content_type)
#                        except:
#                            pass
            #print Graph(store=resource.graph.store).serialize(format="trig")

    def get_send_file_max_age(self, filename):
        if self.debug:
            return 0
        else:
            return Empty.get_send_file_max_age(self, filename)
Example #7
0
        if not instance:
            instance = MastodonInstance(instance=acct.mastodon_instance,
                                        popularity=10)
            db.session.add(instance)
        amount = 0.01
        if acct.policy_enabled:
            amount = 0.5
        for _ in acct.sessions:
            amount += 0.1
        instance.bump(amount / max(1, instance.popularity))

    # normalise scores so the top is 20
    top_pop = (db.session.query(db.func.max(
        MastodonInstance.popularity)).scalar())
    MastodonInstance.query.update({
        MastodonInstance.popularity:
        MastodonInstance.popularity * 20 / top_pop
    })
    db.session.commit()


app.add_periodic_task(40, queue_fetch_for_most_stale_accounts)
app.add_periodic_task(9, queue_deletes)
app.add_periodic_task(6, refresh_account_with_oldest_post)
app.add_periodic_task(50, refresh_account_with_longest_time_since_refresh)
app.add_periodic_task(300, periodic_cleanup)
app.add_periodic_task(300, update_mastodon_instances_popularity)

if __name__ == '__main__':
    app.worker_main()
Example #8
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    sender.add_periodic_task(60 * 60, periodic_curse_login.s())

    sender.add_periodic_task(15 * 60, periodic_remove_expired_caches.s())

    sender.add_periodic_task(15 * 60, periodic_fill_missing_addons.s())

    # todo: replacement for periodic feeds

    sender.add_periodic_task(24 * 60 * 60, periodic_find_hidden_addons.s())  # daily

    sender.add_periodic_task(7 * 24 * 60 * 60, periodic_request_all_files.s())  # weekly
    sender.add_periodic_task(7 * 24 * 60 * 60, periodic_request_all_addons.s())  # weekly

    sender.add_periodic_task(crontab(minute='0', hour='*'), periodic_keep_history.s())  # every hour at XX:00

    periodic_fill_missing_addons.apply_async(countdown=30)

    # Mainly for staging, so we don't redo a full dl every time the env restart if it's been less than a day.
    # The hourly & daily's will get it.
    last = redis_store.get('periodic-find_hidden_addons-last')
    if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1):
        periodic_find_hidden_addons.apply_async(countdown=60 * 60)

    last = redis_store.get('periodic-request_all_files-last')
    if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1):
        periodic_request_all_files.apply_async(countdown=4 * 60 * 60)

    last = redis_store.get('periodic-request_all_addons-last')
    if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1):
        periodic_request_all_addons.apply_async(countdown=4 * 60 * 60)
Example #9
0
import os
from celery import Celery

# set the default Django settings module for the 'celery' program.
from offers.tasks import OfferRefresher

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mieszkania_wwa.settings')

app = Celery('mieszkania_wwa', broker='redis://localhost')


@app.task
def refresh_offers():
    OfferRefresher.instance().refresh()


# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
#   should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')

# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.add_periodic_task(30.0, refresh_offers, name='refresh 30')

app.conf.timezone = 'UTC'


Example #10
0
class App(Empty):
    def configure_extensions(self):
        Empty.configure_extensions(self)
        self.celery = Celery(self.name,
                             broker=self.config['CELERY_BROKER_URL'],
                             beat=True)
        self.celery.conf.update(self.config)

        app = self

        if 'root_path' in self.config:
            self.root_path = self.config['root_path']

        if 'SATORU_TEMPLATE_DIR' in self.config and app.config[
                'SATORU_TEMPLATE_DIR'] is not None:
            my_loader = jinja2.ChoiceLoader([
                jinja2.FileSystemLoader(p)
                for p in self.config['SATORU_TEMPLATE_DIR']
            ] + [app.jinja_loader])
            app.jinja_loader = my_loader

        def setup_task(service):
            service.app = app
            print service
            result = None
            if service.query_predicate == self.NS.graphene.globalChangeQuery:
                result = process_resource
            else:
                result = process_nanopub
            result.service = lambda: service
            return result

        @self.celery.task
        def process_resource(service_name):
            service = self.config['inferencers'][service_name]
            print service
            service.process_graph(app.db)

        @self.celery.task
        def process_nanopub(nanopub_uri, service_name):
            service = self.config['inferencers'][service_name]
            print service, nanopub_uri
            nanopub = app.nanopub_manager.get(nanopub_uri)
            service.process_graph(nanopub)

        def setup_periodic_task(task):
            @self.celery.task
            def find_instances():
                print "Triggered task", task['name']
                for x, in app.db.query(task['service'].get_query()):
                    task['do'](x)

            @self.celery.task
            def do_task(uri):
                print "Running task", task['name'], 'on', uri
                resource = app.get_resource(uri)
                result = task['service'].process_graph(resource.graph)

            task['service'].app = app
            task['find_instances'] = find_instances
            task['do'] = do_task

            return task

        app.inference_tasks = []
        if 'inference_tasks' in self.config:
            app.inference_tasks = [
                setup_periodic_task(task)
                for task in self.config['inference_tasks']
            ]

        for task in app.inference_tasks:
            if 'schedule' in task:
                #print "Scheduling task", task['name'], task['schedule']
                self.celery.add_periodic_task(crontab(**task['schedule']),
                                              task['find_instances'].s(),
                                              name=task['name'])
            else:
                task['find_instances'].delay()

        @self.celery.task()
        def update(nanopub_uri):
            '''gets called whenever there is a change in the knowledge graph.
            Performs a breadth-first knowledge expansion of the current change.'''
            #print "Updating on", nanopub_uri
            nanopub = app.nanopub_manager.get(nanopub_uri)
            nanopub_graph = ConjunctiveGraph(nanopub.store)
            if 'inferencers' in self.config:
                for name, service in self.config['inferencers'].items():
                    service.app = self
                    if service.query_predicate == self.NS.graphene.globalChangeQuery:
                        #print "checking", name, service.get_query()
                        process_resource(name)
                    if service.query_predicate == self.NS.graphene.updateChangeQuery:
                        #print "checking", name, nanopub_uri, service.get_query()
                        if len(list(nanopub_graph.query(
                                service.get_query()))) > 0:
                            print "invoking", name, nanopub_uri
                            process_nanopub(nanopub_uri, name)

        def run_update(nanopub_uri):
            update.delay(nanopub_uri)

        self.nanopub_update_listener = run_update

        @self.celery.task(retry_backoff=True,
                          retry_jitter=True,
                          autoretry_for=(Exception, ),
                          max_retries=4)
        def run_importer(entity_name):
            importer = self.find_importer(entity_name)
            modified = importer.last_modified(entity_name, self.db,
                                              self.nanopub_manager)
            updated = importer.modified(entity_name)
            if updated is None:
                updated = datetime.now(pytz.utc)
            print "Remote modified:", updated, type(
                updated), "Local modified:", modified, type(modified)
            if modified is None or (updated - modified
                                    ).total_seconds() > importer.min_modified:
                importer.load(entity_name, self.db, self.nanopub_manager)

        self.run_importer = run_importer

    def configure_database(self):
        """
        Database configuration should be set here
        """
        self.NS = NS
        self.NS.local = rdflib.Namespace(self.config['lod_prefix'] + '/')

        self.admin_db = database.engine_from_config(self.config, "admin_")
        self.db = database.engine_from_config(self.config, "knowledge_")
        self.db.app = self
        load_namespaces(self.db, locals())
        Resource.db = self.admin_db

        self.vocab = Graph()
        #print URIRef(self.config['vocab_file'])
        self.vocab.load(open("default_vocab.ttl"), format="turtle")
        self.vocab.load(open(self.config['vocab_file']), format="turtle")

        self.role_api = ld.LocalResource(self.NS.prov.Role, "role",
                                         self.admin_db.store, self.vocab,
                                         self.config['lod_prefix'], RoleMixin)
        self.Role = self.role_api.alchemy

        self.user_api = ld.LocalResource(self.NS.prov.Agent, "user",
                                         self.admin_db.store, self.vocab,
                                         self.config['lod_prefix'], UserMixin)
        self.User = self.user_api.alchemy

        self.nanopub_api = ld.LocalResource(self.NS.np.Nanopublication,
                                            "pub",
                                            self.db.store,
                                            self.vocab,
                                            self.config['lod_prefix'],
                                            name="Graph")
        self.Nanopub = self.nanopub_api.alchemy

        self.classes = mapper(self.Role, self.User)
        self.datastore = RDFAlchemyUserDatastore(self.admin_db, self.classes,
                                                 self.User, self.Role)
        self.security = Security(self,
                                 self.datastore,
                                 register_form=ExtendedRegisterForm)
        #self.mail = Mail(self)

        DepotManager.configure('nanopublications',
                               self.config['nanopub_archive'])
        DepotManager.configure('files', self.config['file_archive'])
        self.file_depot = DepotManager.get('files')

    def weighted_route(self, *args, **kwargs):
        def decorator(view_func):
            compare_key = kwargs.pop('compare_key', None)
            # register view_func with route
            self.route(*args, **kwargs)(view_func)

            if compare_key is not None:
                rule = self.url_map._rules[-1]
                rule.match_compare_key = lambda: compare_key

            return view_func

        return decorator

    def map_entity(self, name):
        for importer in self.config['namespaces']:
            if importer.matches(name):
                new_name = importer.map(name)
                #print 'Found mapped URI', new_name
                return new_name, importer
        return None, None

    def find_importer(self, name):
        for importer in self.config['namespaces']:
            if importer.resource_matches(name):
                return importer
        return None

    class Entity(rdflib.resource.Resource):
        _this = None

        def this(self):
            if self._this is None:
                self._this = self._graph.app.get_entity(self.identifier)
            return self._this

        _description = None

        def description(self):
            if self._description is None:
                #                try:
                result = Graph()
                #                try:
                for s, p, o, c in self._graph.query(
                        '''
construct {
    ?e ?p ?o.
    ?o rdfs:label ?label.
    ?o skos:prefLabel ?prefLabel.
    ?o dc:title ?title.
    ?o foaf:name ?name.
    ?o ?pattr ?oatter.
    ?oattr rdfs:label ?oattrlabel
} where {
    graph ?g {
      ?e ?p ?o.
    }
    ?g a np:Assertion.
    optional {
      ?e sio:hasAttribute|sio:hasPart ?o.
      ?o ?pattr ?oattr.
      optional {
        ?oattr rdfs:label ?oattrlabel.
      }
    }
    optional {
      ?o rdfs:label ?label.
    }
    optional {
      ?o skos:prefLabel ?prefLabel.
    }
    optional {
      ?o dc:title ?title.
    }
    optional {
      ?o foaf:name ?name.
    }
}''',
                        initNs=NS.prefixes,
                        initBindings={'e': self.identifier}):
                    result.add((s, p, o))
#                except:
#                    pass
                self._description = result.resource(self.identifier)
#                except Exception as e:
#                    print str(e), self.identifier
#                    raise e
            return self._description

    def get_resource(self, entity):
        mapped_name, importer = self.map_entity(entity)

        if mapped_name is not None:
            entity = mapped_name

        if importer is None:
            importer = self.find_importer(entity)

        if importer is not None:
            modified = importer.last_modified(entity, self.db,
                                              self.nanopub_manager)
            if modified is None:
                self.run_importer(entity)
            else:
                print "Type of modified is", type(modified)
                self.run_importer.delay(entity)
        return self.Entity(self.db, entity)

    def configure_template_filters(self):
        import urllib
        from markupsafe import Markup

        @self.template_filter('urlencode')
        def urlencode_filter(s):
            if type(s) == 'Markup':
                s = s.unescape()
            s = s.encode('utf8')
            s = urllib.quote_plus(s)
            return Markup(s)

        @self.template_filter('lang')
        def lang_filter(terms):
            terms = list(terms)
            if terms is None or len(terms) == 0:
                return []
            resources = [x for x in terms if not isinstance(x, rdflib.Literal)]
            literals = [x for x in terms if isinstance(x, rdflib.Literal)]
            languages = set(
                [x.language for x in literals if x.language is not None])
            best_lang = request.accept_languages.best_match(list(languages))
            best_terms = [x for x in literals if x.language == best_lang]
            if len(best_terms) == 0:
                best_terms = [
                    x for x in literals
                    if x.language == self.config['default_language']
                ]
            if len(best_terms) > 0:
                return resources + best_terms
            return resources

        self.lang_filter = lang_filter

    def add_file(self, f, entity, nanopub):
        old_nanopubs = []
        for np_uri, np_assertion, in self.db.query(
                '''select distinct ?np ?assertion where {
    graph ?assertion {?e graphene:hasFileID ?fileid}
    ?np np:hasAssertion ?assertion.
}''',
                initNs=NS.prefixes,
                initBindings=dict(e=entity)):
            if not self._can_edit(np_uri):
                raise Unauthorized()
            old_nanopubs.append((np_uri, np_assertion))
        fileid = self.file_depot.create(f.stream, f.filename, f.mimetype)
        nanopub.assertion.add((entity, NS.graphene.hasFileID, Literal(fileid)))
        nanopub.assertion.add((entity, NS.dc.contributor, current_user.resUri))
        nanopub.assertion.add(
            (entity, NS.dc.created, Literal(datetime.utcnow())))
        nanopub.assertion.add(
            (entity, NS.ov.hasContentType, Literal(f.mimetype)))
        nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype]))
        nanopub.assertion.add(
            (NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add(
            (entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]]))
        nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]],
                               NS.RDF.type, NS.dc.FileFormat))
        nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File))

        nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.contributor,
                             current_user.resUri))
        nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created,
                             Literal(datetime.utcnow())))

        return old_nanopubs

    def delete_file(self, entity):
        for np_uri, in self.db.query('''select distinct ?np where {
    graph ?np_assertion {?e graphene:hasFileID ?fileid}
    ?np np:hasAssertion ?np_assertion.
}''',
                                     initNs=NS.prefixes,
                                     initBindings=dict(e=entity)):
            if not self._can_edit(np_uri):
                raise Unauthorized()
        self.nanopub_manager.retire(np_uri)

    def add_files(self, uri, files, upload_type=NS.pv.File):
        nanopub = self.nanopub_manager.new()

        added_files = False

        old_nanopubs = []

        nanopub.assertion.add((uri, self.NS.RDF.type, upload_type))
        if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"):
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri + "/" + filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add((uri, NS.dc.hasPart, file_uri))
                    added_files = True
        elif upload_type == NS.dcat.Dataset:
            for f in files:
                filename = secure_filename(f.filename)
                if filename != '':
                    file_uri = URIRef(uri + "/" + filename)
                    old_nanopubs.extend(self.add_file(f, file_uri, nanopub))
                    nanopub.assertion.add(
                        (uri, NS.dcat.distribution, file_uri))
                    nanopub.assertion.add(
                        (file_uri, NS.RDF.type, NS.dcat.Distribution))
                    nanopub.assertion.add(
                        (file_uri, NS.dcat.downloadURL, file_uri))
                    added_files = True
        else:
            for f in files:
                if f.filename != '':
                    old_nanopubs.extend(self.add_file(f, uri, nanopub))
                    nanopub.assertion.add((uri, ns.RDF.type, NS.pv.File))
                    added_files = True
                    break

        if added_files:
            for old_np, old_np_assertion in old_nanopubs:
                nanopub.pubinfo.add((nanopub.assertion.identifier,
                                     NS.prov.wasRevisionOf, old_np_assertion))
                self.nanopub_manager.retire(old_np)

            for n in self.nanopub_manager.prepare(nanopub):
                self.nanopub_manager.publish(n)

    def _can_edit(self, uri):
        if current_user.has_role('Publisher') or current_user.has_role(
                'Editor') or current_user.has_role('Admin'):
            return True
        if self.db.query('''ask {
    ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info.
    graph ?info { ?assertion dc:contributor ?user. }
}''',
                         initBindings=dict(nanopub=uri,
                                           user=current_user.resUri),
                         initNs=dict(np=self.NS.np, dc=self.NS.dc)):
            #print "Is owner."
            return True
        return False

    def configure_views(self):
        def sort_by(resources, property):
            return sorted(resources, key=lambda x: x.value(property))

        class InvitedAnonymousUser(AnonymousUserMixin):
            '''A user that has been referred via kikm references but does not have a user account.'''
            def __init__(self):
                self.roles = ImmutableList()

            def has_role(self, *args):
                """Returns `False`"""
                return False

            def is_active(self):
                return True

            @property
            def is_authenticated(self):
                return True

        def camel_case_split(identifier):
            matches = finditer(
                '.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)',
                identifier)
            return [m.group(0) for m in matches]

        label_properties = [
            self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.dc.title,
            self.NS.foaf.name
        ]

        @lru
        def get_remote_label(uri):
            for db in [self.db, self.admin_db]:
                g = Graph()
                try:
                    g += db.query('''select ?s ?p ?o where { ?s ?p ?o.}''',
                                  initNs=self.NS.prefixes,
                                  initBindings=dict(s=uri))
                except:
                    pass
                resource_entity = g.resource(uri)
                if len(resource_entity.graph) == 0:
                    #print "skipping", db
                    continue
                for property in label_properties:
                    labels = self.lang_filter(resource_entity[property])
                    if len(labels) > 0:
                        return labels[0]

                if len(labels) == 0:
                    name = [
                        x.value for x in [
                            resource_entity.value(self.NS.foaf.givenName),
                            resource_entity.value(self.NS.foaf.familyName)
                        ] if x is not None
                    ]
                    if len(name) > 0:
                        label = ' '.join(name)
                        return label
            try:
                label = self.db.qname(uri).split(":")[1].replace("_", " ")
                return ' '.join(camel_case_split(label)).title()
            except Exception as e:
                print str(e), uri
                return str(uri)

        def get_label(resource):
            for property in label_properties:
                labels = self.lang_filter(resource[property])
                #print "mem", property, label
                if len(labels) > 0:
                    return labels[0]
            return get_remote_label(resource.identifier)

        @self.before_request
        def load_forms():
            if 'API_KEY' in self.config:
                if 'API_KEY' in request.args and request.args[
                        'API_KEY'] == self.config['API_KEY']:
                    print 'logging in invited user'
                    login_user(InvitedAnonymousUser())

            #g.search_form = SearchForm()
            g.ns = self.NS
            g.get_summary = get_summary
            g.get_label = get_label
            g.get_entity = self.get_entity
            g.rdflib = rdflib
            g.isinstance = isinstance
            g.db = self.db

        @self.login_manager.user_loader
        def load_user(user_id):
            if user_id != None:
                #try:
                return self.datastore.find_user(id=user_id)
            #except:
            #    return None
            else:
                return None

        extensions = {
            "rdf": "application/rdf+xml",
            "jsonld": "application/ld+json",
            "json": "application/json",
            "ttl": "text/turtle",
            "trig": "application/trig",
            "turtle": "text/turtle",
            "owl": "application/rdf+xml",
            "nq": "application/n-quads",
            "nt": "application/n-triples",
            "html": "text/html"
        }

        dataFormats = {
            "application/rdf+xml": "xml",
            "application/ld+json": 'json-ld',
            "text/turtle": "turtle",
            "application/trig": "trig",
            "application/n-quads": "nquads",
            "application/n-triples": "nt",
            "application/rdf+json": "json",
            None: "json-ld"
        }

        def get_graphs(graphs):
            query = 'select ?s ?p ?o ?g where {graph ?g {?s ?p ?o} } values ?g { %s }'
            query = query % ' '.join([graph.n3() for graph in graphs])
            #print query
            quads = self.db.store.query(query)
            result = Dataset()
            result.addN(quads)
            return result

        def explain(graph):
            values = ')\n  ('.join([
                ' '.join([x.n3() for x in triple])
                for triple in graph.triples((None, None, None))
            ])
            values = 'VALUES (?s ?p ?o)\n{\n(' + values + ')\n}'

            try:
                nanopubs = self.db.query('''select distinct ?np where {
    ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
        np:hasPublicationInfo ?pubinfo;
        np:hasAssertion ?assertion;
    graph ?assertion { ?s ?p ?o.}
}''' + values,
                                         initNs={
                                             'np': self.NS.np,
                                             'sio': self.NS.sio,
                                             'dc': self.NS.dc,
                                             'foaf': self.NS.foaf
                                         })
                result = ConjunctiveGraph()
                for nanopub_uri, in nanopubs:
                    self.nanopub_manager.get(nanopub_uri, result)
            except Exception as e:
                print str(e), entity
                raise e
            return result.resource(entity)

        def get_entity_sparql(entity):
            try:
                statements = self.db.query(
                    '''select distinct ?s ?p ?o ?g where {
            ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
                np:hasPublicationInfo ?pubinfo;
                np:hasAssertion ?assertion;

            {graph ?np { ?np sio:isAbout ?e.}}
            UNION
            {graph ?assertion { ?e ?p ?o.}}
            graph ?g { ?s ?p ?o }
        }''',
                    initBindings={'e': entity},
                    initNs={
                        'np': self.NS.np,
                        'sio': self.NS.sio,
                        'dc': self.NS.dc,
                        'foaf': self.NS.foaf
                    })
                result = ConjunctiveGraph()
                result.addN(statements)
            except Exception as e:
                print str(e), entity
                raise e
            #print result.serialize(format="trig")
            return result.resource(entity)

        def get_entity_disk(entity):
            try:
                nanopubs = self.db.query('''select distinct ?np where {
            ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g;
                np:hasPublicationInfo ?pubinfo;
                np:hasAssertion ?assertion;

            {graph ?np { ?np sio:isAbout ?e.}}
            UNION
            {graph ?assertion { ?e ?p ?o.}}
        }''',
                                         initBindings={'e': entity},
                                         initNs={
                                             'np': self.NS.np,
                                             'sio': self.NS.sio,
                                             'dc': self.NS.dc,
                                             'foaf': self.NS.foaf
                                         })
                result = ConjunctiveGraph()
                for nanopub_uri, in nanopubs:
                    self.nanopub_manager.get(nanopub_uri, result)
#                result.addN(nanopubs)
            except Exception as e:
                print str(e), entity
                raise e
            #print result.serialize(format="trig")
            return result.resource(entity)

        get_entity = get_entity_sparql

        self.get_entity = get_entity

        def get_summary(resource):
            summary_properties = [
                self.NS.skos.definition, self.NS.dc.abstract,
                self.NS.dc.description, self.NS.dc.summary,
                self.NS.RDFS.comment, self.NS.dcelements.description
            ]
            for property in summary_properties:
                terms = self.lang_filter(resource[property])
                for term in terms:
                    yield (property, term)

        self.get_summary = get_summary

        @self.route('/sparql', methods=['GET', 'POST'])
        @login_required
        def sparql_view():
            has_query = False
            for arg in request.args.keys():
                if arg.lower() == "update":
                    return "Update not allowed.", 403
                if arg.lower() == 'query':
                    has_query = True
            if request.method == 'GET' and not has_query:
                return redirect(url_for('sparql_form'))
            #print self.db.store.query_endpoint
            if request.method == 'GET':
                headers = {}
                headers.update(request.headers)
                if 'Content-Length' in headers:
                    del headers['Content-Length']
                req = requests.get(self.db.store.query_endpoint,
                                   headers=headers,
                                   params=request.args)
            elif request.method == 'POST':
                if 'application/sparql-update' in request.headers[
                        'content-type']:
                    return "Update not allowed.", 403
                req = requests.post(self.db.store.query_endpoint,
                                    data=request.get_data(),
                                    headers=request.headers,
                                    params=request.args)
            #print self.db.store.query_endpoint
            #print req.status_code
            response = Response(req.content,
                                content_type=req.headers['content-type'])
            #response.headers[con(req.headers)
            return response, req.status_code

        @self.route('/sparql.html')
        @login_required
        def sparql_form():
            template_args = dict(ns=self.NS,
                                 g=g,
                                 current_user=current_user,
                                 isinstance=isinstance,
                                 rdflib=rdflib,
                                 hasattr=hasattr,
                                 set=set)

            return render_template('sparql.html',
                                   endpoint="/sparql",
                                   **template_args)

        if 'SATORU_CDN_DIR' in self.config and self.config[
                'SATORU_CDN_DIR'] is not None:

            @self.route('/cdn/<path:filename>')
            def cdn(filename):
                return send_from_directory(
                    self.config['SATORU_CDN_DIR'],
                    werkzeug.utils.secure_filename(filename))

        @self.route('/about.<format>', methods=['GET', 'POST', 'DELETE'])
        @self.route('/about', methods=['GET', 'POST', 'DELETE'])
        @self.weighted_route('/<path:name>.<format>',
                             compare_key=bottom_compare_key,
                             methods=['GET', 'POST', 'DELETE'])
        @self.weighted_route('/<path:name>',
                             compare_key=bottom_compare_key,
                             methods=['GET', 'POST', 'DELETE'])
        @self.route('/', methods=['GET', 'POST', 'DELETE'])
        @login_required
        def view(name=None, format=None, view=None):
            if format is not None:
                if format in extensions:
                    content_type = extensions[format]
                else:
                    name = '.'.join([name, format])
            if name is not None:
                entity = self.NS.local[name]
            elif 'uri' in request.args:
                entity = URIRef(request.args['uri'])
            else:
                entity = self.NS.local.Home

            if request.method == 'POST':
                if len(request.files) == 0:
                    flash('No file uploaded')
                    return redirect(request.url)
                upload_type = rdflib.URIRef(request.form['upload_type'])
                self.add_files(
                    entity,
                    [y for x, y in request.files.iteritems(multi=True)],
                    upload_type=upload_type)
                url = "/about?%s" % urlencode(
                    dict(uri=unicode(entity), view="view"))
                return redirect(url)
            elif request.method == 'DELETE':
                self.delete_file(entity)
                return '', 204
            elif request.method == 'GET':
                resource = self.get_resource(entity)

                content_type = request.headers[
                    'Accept'] if 'Accept' in request.headers else '*/*'
                #print entity

                htmls = set(['application/xhtml', 'text/html'])
                if 'view' in request.args or sadi.mimeparse.best_match(
                        htmls, content_type) in htmls:
                    return render_view(resource)
                else:
                    fmt = dataFormats[sadi.mimeparse.best_match(
                        [mt for mt in dataFormats.keys() if mt is not None],
                        content_type)]
                    return resource.this().graph.serialize(format=fmt)

        views = {}

        def render_view(resource):
            template_args = dict(ns=self.NS,
                                 this=resource,
                                 g=g,
                                 current_user=current_user,
                                 isinstance=isinstance,
                                 get_entity=get_entity,
                                 get_summary=get_summary,
                                 rdflib=rdflib,
                                 hasattr=hasattr,
                                 set=set)
            view = None
            if 'view' in request.args:
                view = request.args['view']
            # 'view' is the default view
            fileid = resource.value(self.NS.graphene.hasFileID)
            if fileid is not None and view is None:
                f = self.file_depot.get(fileid)
                fsa = FileServeApp(
                    f, self.config["file_archive"].get("cache_max_age",
                                                       3600 * 24 * 7))
                return fsa

            if view is None:
                view = 'view'

            if 'as' in request.args:
                types = [URIRef(request.args['as']), 0]
            else:
                types = list([(x.identifier, 0) for x in resource[RDF.type]])
            #print types
            #if len(types) == 0:
            types.append([self.NS.RDFS.Resource, 100])
            #print view, resource.identifier, types
            type_string = ' '.join(
                ["(%s %d '%s')" % (x.n3(), i, view) for x, i in types])
            view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c where {
    values (?c ?priority ?id) { %s }
    ?c rdfs:subClassOf* ?mid.
    ?mid rdfs:subClassOf* ?class.
    ?class ?viewProperty ?view.
    ?viewProperty rdfs:subPropertyOf* graphene:hasView.
    ?viewProperty dc:identifier ?id.
} group by ?c ?class order by ?rank
''' % type_string

            #print view_query
            views = list(
                self.vocab.query(view_query,
                                 initNs=dict(graphene=self.NS.graphene,
                                             dc=self.NS.dc)))
            #print '\n'.join([str(x.asdict()) for x in views])
            if len(views) == 0:
                abort(404)

            headers = {'Content-Type': "text/html"}
            extension = views[0]['view'].value.split(".")[-1]
            if extension in extensions:
                headers['Content-Type'] = extensions[extension]

            # default view (list of nanopubs)
            # if available, replace with class view
            # if available, replace with instance view
            return render_template(views[0]['view'].value,
                                   **template_args), 200, headers

        def render_nanopub(data, code, headers=None):
            entity = app.Entity(ConjunctiveGraph(data.store), data.identifier)
            entity.nanopub = data
            data, code, headers = render_view(entity)
            resp = make_response(data, code)
            resp.headers.extend(headers or {})
            return resp

        self.api = ld.LinkedDataApi(self, "", self.db.store, "")
        self.api.representations['text/html'] = render_nanopub

        #self.admin = Admin(self, name="graphene", template_mode='bootstrap3')
        #self.admin.add_view(ld.ModelView(self.nanopub_api, default_sort=RDFS.label))
        #self.admin.add_view(ld.ModelView(self.role_api, default_sort=RDFS.label))
        #self.admin.add_view(ld.ModelView(self.user_api, default_sort=foaf.familyName))

        app = self

        self.nanopub_manager = NanopublicationManager(
            app.db.store,
            Namespace('%s/pub/' % (app.config['lod_prefix'])),
            update_listener=self.nanopub_update_listener)

        class NanopublicationResource(ld.LinkedDataResource):
            decorators = [login_required]

            def __init__(self):
                self.local_resource = app.nanopub_api

            def _get_uri(self, ident):
                return URIRef('%s/pub/%s' % (app.config['lod_prefix'], ident))

            def get(self, ident):
                ident = ident.split("_")[0]
                uri = self._get_uri(ident)
                try:
                    result = app.nanopub_manager.get(uri)
                except IOError:
                    return 'Resource not found', 404
                return result

            def delete(self, ident):
                uri = self._get_uri(ident)
                if not app._can_edit(uri):
                    return '<h1>Not Authorized</h1>', 401
                app.nanopub_manager.retire(uri)
                #self.local_resource.delete(uri)
                return '', 204

            def _get_graph(self):
                inputGraph = ConjunctiveGraph()
                contentType = request.headers['Content-Type']
                sadi.deserialize(inputGraph, request.data, contentType)
                return inputGraph

            def put(self, ident):
                nanopub_uri = self._get_uri(ident)
                inputGraph = self._get_graph()
                old_nanopub = self._prep_nanopub(nanopub_uri, inputGraph)
                for nanopub in app.nanopub_manager.prepare(inputGraph):
                    modified = Literal(datetime.utcnow())
                    nanopub.pubinfo.add((nanopub.assertion.identifier,
                                         app.NS.prov.wasRevisionOf,
                                         old_nanopub.assertion.identifier))
                    nanopub.pubinfo.add(
                        (old_nanopub.assertion.identifier,
                         app.NS.prov.invalidatedAtTime, modified))
                    nanopub.pubinfo.add((nanopub.assertion.identifier,
                                         app.NS.dc.modified, modified))
                    app.nanopub_manager.retire(nanopub_uri)
                    app.nanopub_manager.publish(nanopub)

            def _prep_nanopub(self, nanopub_uri, graph):
                nanopub = Nanopublication(store=graph.store,
                                          identifier=nanopub_uri)
                about = nanopub.nanopub_resource.value(app.NS.sio.isAbout)
                #print nanopub.assertion_resource.identifier, about
                self._prep_graph(nanopub.assertion_resource, about.identifier)
                self._prep_graph(nanopub.pubinfo_resource,
                                 nanopub.assertion_resource.identifier)
                self._prep_graph(nanopub.provenance_resource,
                                 nanopub.assertion_resource.identifier)
                nanopub.pubinfo.add(
                    (nanopub.assertion.identifier, app.NS.dc.contributor,
                     current_user.resUri))
                return nanopub

            def post(self, ident=None):
                if ident is not None:
                    return self.put(ident)
                inputGraph = self._get_graph()
                for nanopub_uri in inputGraph.subjects(
                        rdflib.RDF.type, app.NS.np.Nanopublication):
                    nanopub = self._prep_nanopub(nanopub_uri, inputGraph)
                    nanopub.pubinfo.add(
                        (nanopub.assertion.identifier, app.NS.dc.created,
                         Literal(datetime.utcnow())))
                for nanopub in app.nanopub_manager.prepare(inputGraph):
                    app.nanopub_manager.publish(nanopub)

                return '', 201

            def _prep_graph(self, resource, about=None):
                #print '_prep_graph', resource.identifier, about
                content_type = resource.value(app.NS.ov.hasContentType)
                if content_type is not None:
                    content_type = content_type.value
                #print 'graph content type', resource.identifier, content_type
                #print resource.graph.serialize(format="nquads")
                g = Graph(store=resource.graph.store,
                          identifier=resource.identifier)
                text = resource.value(app.NS.prov.value)
                if content_type is not None and text is not None:
                    #print 'Content type:', content_type, resource.identifier
                    html = None
                    if content_type in ["text/html", "application/xhtml+xml"]:
                        html = Literal(text.value, datatype=RDF.HTML)
                    if content_type == 'text/markdown':
                        #print "Aha, markdown!"
                        #print text.value
                        html = markdown.markdown(text.value,
                                                 extensions=['rdfa'])
                        attributes = [
                            'vocab="%s"' % app.NS.local,
                            'base="%s"' % app.NS.local,
                            'prefix="%s"' % ' '.join([
                                '%s: %s' % x for x in app.NS.prefixes.items()
                            ])
                        ]
                        if about is not None:
                            attributes.append('resource="%s"' % about)
                        html = '<div %s>%s</div>' % (' '.join(attributes),
                                                     html)
                        html = Literal(html, datatype=RDF.HTML)
                        text = html
                        content_type = "text/html"
                    #print resource.identifier, content_type
                    if html is not None:
                        resource.add(app.NS.sioc.content, html)
                        try:
                            g.parse(data=text, format='rdfa')
                        except:
                            pass
                    else:
                        #print "Deserializing", g.identifier, 'as', content_type
                        #print dataFormats
                        if content_type in dataFormats:
                            g.parse(data=text,
                                    format=dataFormats[content_type])
                            #print len(g)
                        else:
                            print "not attempting to deserialize."


#                            try:
#                                sadi.deserialize(g, text, content_type)
#                            except:
#                                pass
#print Graph(store=resource.graph.store).serialize(format="trig")

        self.api.add_resource(NanopublicationResource, '/pub', '/pub/<ident>')
Example #11
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    sender.add_periodic_task(15 * 60, p_remove_expired_caches.s())
    sender.add_periodic_task(15 * 60, p_curse_checklogin.s())

    sender.add_periodic_task(15 * 60, p_update_all_addons.s())

    sender.add_periodic_task(45 * 60, p_fill_incomplete_addons.s())

    sender.add_periodic_task(6 * 60 * 60, p_update_all_files.s())
    # sender.add_periodic_task(7 * 24 * 60 * 60, p_find_hidden_addons.s())

    sender.add_periodic_task(crontab(minute='0'), p_keep_history.s())
Example #12
0
def setup_periodic_tasks(sender: Celery, **kwargs):
    if not settings.DEBUG:
        sender.add_periodic_task(1.0,
                                 redis_celery_queue_depth.s(),
                                 name="1 sec queue probe",
                                 priority=0)
    # Heartbeat every 10sec to make sure the worker is alive
    sender.add_periodic_task(10.0,
                             redis_heartbeat.s(),
                             name="10 sec heartbeat",
                             priority=0)

    # Update events table partitions twice a week
    sender.add_periodic_task(
        crontab(day_of_week="mon,fri", hour=0, minute=0),
        update_event_partitions.s(),  # check twice a week
    )

    # Send weekly status report on self-hosted instances
    if not getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0),
                                 status_report.s())

    # Cloud (posthog-cloud) cron jobs
    if getattr(settings, "MULTI_TENANCY", False):
        sender.add_periodic_task(
            crontab(hour=0, minute=0),
            calculate_billing_daily_usage.s())  # every day midnight UTC

    sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0),
                             clean_stale_partials.s())

    # delete old plugin logs every 4 hours
    sender.add_periodic_task(crontab(minute=0, hour="*/4"),
                             delete_old_plugin_logs.s())

    # sync all Organization.available_features every hour
    sender.add_periodic_task(crontab(minute=30, hour="*"),
                             sync_all_organization_available_features.s())

    sender.add_periodic_task(UPDATE_CACHED_DASHBOARD_ITEMS_INTERVAL_SECONDS,
                             check_cached_items.s(),
                             name="check dashboard items")

    sender.add_periodic_task(crontab(minute="*/15"),
                             check_async_migration_health.s())

    sender.add_periodic_task(
        crontab(
            hour=0, minute=randrange(0, 40)
        ),  # every day at a random minute past midnight. Sends data from the preceding whole day.
        send_org_usage_report.s(),
        name="send event usage report",
    )

    sender.add_periodic_task(120,
                             clickhouse_lag.s(),
                             name="clickhouse table lag")
    sender.add_periodic_task(120,
                             clickhouse_row_count.s(),
                             name="clickhouse events table row count")
    sender.add_periodic_task(120,
                             clickhouse_part_count.s(),
                             name="clickhouse table parts count")
    sender.add_periodic_task(120,
                             clickhouse_mutation_count.s(),
                             name="clickhouse table mutations count")

    sender.add_periodic_task(crontab(minute=0, hour="*"),
                             calculate_cohort_ids_in_feature_flags_task.s())

    sender.add_periodic_task(
        crontab(hour=0, minute=randrange(0, 40)),
        clickhouse_send_license_usage.s()
    )  # every day at a random minute past midnight. Randomize to avoid overloading license.posthog.com
    try:
        from ee.settings import MATERIALIZE_COLUMNS_SCHEDULE_CRON

        minute, hour, day_of_month, month_of_year, day_of_week = MATERIALIZE_COLUMNS_SCHEDULE_CRON.strip(
        ).split(" ")

        sender.add_periodic_task(
            crontab(
                minute=minute,
                hour=hour,
                day_of_month=day_of_month,
                month_of_year=month_of_year,
                day_of_week=day_of_week,
            ),
            clickhouse_materialize_columns.s(),
            name="clickhouse materialize columns",
        )

        sender.add_periodic_task(
            crontab(hour="*/4", minute=0),
            clickhouse_mark_all_materialized.s(),
            name="clickhouse mark all columns as materialized",
        )
    except Exception as err:
        capture_exception(err)
        print(f"Scheduling materialized column task failed: {err}")

    sender.add_periodic_task(120,
                             calculate_cohort.s(),
                             name="recalculate cohorts")

    if settings.ASYNC_EVENT_PROPERTY_USAGE:
        sender.add_periodic_task(
            EVENT_PROPERTY_USAGE_INTERVAL_SECONDS,
            calculate_event_property_usage.s(),
            name="calculate event property usage",
        )
Example #13
0
# -*- coding:utf-8 -*-

import os
from celery import Celery
from df_celery.tasks import periodic_task

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'daily_fresh.settings')

app = Celery('daily_fresh')

app.config_from_object('django.conf:settings', namespace='CELERY')

app.autodiscover_tasks()

app.add_periodic_task(5, periodic_task)