def setup_periodic_tasks(sender: Celery, **kwargs): # fire off tasks that should run each time the app starts refresh_software_versions.apply_async(countdown=60) # queue up tasks that should be run periodically sender.add_periodic_task(60*60*24, refresh_software_versions)
def set_up_periodic_tasks(sender: Celery, **kwargs) -> None: """ Configured scheduled tasks. Both discovery and attack jobs run every 10 seconds, but further code may choose whether or not to execute something at this point. This method shouldn't be called except by Celery itself. :param sender: Celery instance :param kwargs: Optional values """ if not modules: safe_load_config() sender.add_periodic_task(BEAT_INTERVAL, run_jobs.s(), name='Launch attacks') from redbot.modules.discovery import do_discovery sender.add_periodic_task(BEAT_INTERVAL, do_discovery.s(), queue='discovery', name='Launch discovery')
def setup_periodic_tasks(sender: Celery, **kwargs): sender.add_periodic_task(60 * 60, periodic_curse_login.s()) sender.add_periodic_task(15 * 60, periodic_fill_missing_addons.s()) sender.add_periodic_task(25 * 60, periodic_addon_feeds.s( Timespan.HOURLY.value)) # 25 minutes sender.add_periodic_task(11 * 60 * 60, periodic_addon_feeds.s( Timespan.DAILY.value)) # 11 hours sender.add_periodic_task(3 * 24 * 60 * 60, periodic_addon_feeds.s( Timespan.WEEKLY.value)) # 3 days sender.add_periodic_task(2 * 7 * 24 * 60 * 60, periodic_addon_feeds.s( Timespan.COMPLETE.value)) # 2 weeks (14 days) sender.add_periodic_task(24 * 60 * 60, periodic_find_hidden_addons.s()) # daily sender.add_periodic_task(7 * 24 * 60 * 60, periodic_request_all_files.s()) # weekly sender.add_periodic_task(crontab(minute='0', hour='*'), periodic_keep_history.s()) # every hour at XX:00 periodic_fill_missing_addons.apply_async(countdown=30) # Mainly for staging, so we don't redo a full dl every time the env restart if it's been less than a day. # The hourly & daily's will get it. last = redis_store.get('periodic-addon_feeds-last-{}'.format( Timespan.COMPLETE.value)) if last is None or datetime.now() - datetime.fromtimestamp( int(last)) > timedelta(days=1): periodic_addon_feeds.apply_async([Timespan.COMPLETE.value], countdown=60) last = redis_store.get('periodic-find_hidden_addons-last') if last is None or datetime.now() - datetime.fromtimestamp( int(last)) > timedelta(days=1): periodic_find_hidden_addons.apply_async(countdown=60 * 60) last = redis_store.get('periodic-request_all_files-last') if last is None or datetime.now() - datetime.fromtimestamp( int(last)) > timedelta(days=1): periodic_request_all_files.apply_async(countdown=4 * 60 * 60)
def setup_periodic_tasks(sender: Celery, **kwargs): if not settings.DEBUG: sender.add_periodic_task(1.0, redis_celery_queue_depth.s(), name="1 sec queue probe", priority=0) # Heartbeat every 10sec to make sure the worker is alive sender.add_periodic_task(10.0, redis_heartbeat.s(), name="10 sec heartbeat", priority=0) # Update events table partitions twice a week sender.add_periodic_task( crontab(day_of_week="mon,fri", hour=0, minute=0), update_event_partitions.s(), # check twice a week ) if getattr(settings, "MULTI_TENANCY", False) and not is_clickhouse_enabled(): sender.add_periodic_task(crontab(minute=0, hour="*/12"), run_session_recording_retention.s()) # Send weekly status report on self-hosted instances if not getattr(settings, "MULTI_TENANCY", False): sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0), status_report.s()) # Cloud (posthog-cloud) cron jobs if getattr(settings, "MULTI_TENANCY", False): sender.add_periodic_task(crontab(hour=0, minute=0), calculate_billing_daily_usage.s()) # every day midnight UTC # Send weekly email report (~ 8:00 SF / 16:00 UK / 17:00 EU) sender.add_periodic_task(crontab(day_of_week="mon", hour=15, minute=0), send_weekly_email_report.s()) sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0), clean_stale_partials.s()) # delete old plugin logs every 4 hours sender.add_periodic_task(crontab(minute=0, hour="*/4"), delete_old_plugin_logs.s()) # sync all Organization.available_features every hour sender.add_periodic_task(crontab(minute=30, hour="*"), sync_all_organization_available_features.s()) sender.add_periodic_task( UPDATE_CACHED_DASHBOARD_ITEMS_INTERVAL_SECONDS, check_cached_items.s(), name="check dashboard items" ) if is_clickhouse_enabled(): sender.add_periodic_task(120, clickhouse_lag.s(), name="clickhouse table lag") sender.add_periodic_task(120, clickhouse_row_count.s(), name="clickhouse events table row count") sender.add_periodic_task(120, clickhouse_part_count.s(), name="clickhouse table parts count") sender.add_periodic_task(120, clickhouse_mutation_count.s(), name="clickhouse table mutations count") else: sender.add_periodic_task( ACTION_EVENT_MAPPING_INTERVAL_SECONDS, calculate_event_action_mappings.s(), name="calculate event action mappings", expires=ACTION_EVENT_MAPPING_INTERVAL_SECONDS, ) sender.add_periodic_task(120, calculate_cohort.s(), name="recalculate cohorts") if settings.ASYNC_EVENT_PROPERTY_USAGE: sender.add_periodic_task( EVENT_PROPERTY_USAGE_INTERVAL_SECONDS, calculate_event_property_usage.s(), name="calculate event property usage", )
class App(Empty): managed = False def configure_extensions(self): Empty.configure_extensions(self) self.celery = Celery(self.name, broker=self.config['CELERY_BROKER_URL'], beat=True) self.celery.conf.update(self.config) self.celery.conf.ONCE = { 'backend': 'celery_once.backends.Redis', 'settings': { 'url': self.config['CELERY_BROKER_URL'], 'default_timeout': 60 * 60 * 24 } } class ContextTask(self.celery.Task): def __call__(self, *args, **kwargs): with app.app_context(): return self.run(*args, **kwargs) self.celery.Task = ContextTask # Make QueueOnce app context aware. class ContextQueueOnce(QueueOnce): def __call__(self, *args, **kwargs): with app.app_context(): return super(ContextQueueOnce, self).__call__(*args, **kwargs) # Attach to celery object for easy access. self.celery.QueueOnce = ContextQueueOnce app = self if 'root_path' in self.config: self.root_path = self.config['root_path'] if 'WHYIS_TEMPLATE_DIR' in self.config and app.config[ 'WHYIS_TEMPLATE_DIR'] is not None: my_loader = jinja2.ChoiceLoader([ jinja2.FileSystemLoader(p) for p in self.config['WHYIS_TEMPLATE_DIR'] ] + [app.jinja_loader]) app.jinja_loader = my_loader @self.celery.task(base=QueueOnce, once={'graceful': True}) def process_resource(service_name, taskid=None): service = self.config['inferencers'][service_name] service.process_graph(app.db) @self.celery.task def process_nanopub(nanopub_uri, service_name, taskid=None): service = self.config['inferencers'][service_name] print(service, nanopub_uri) if app.nanopub_manager.is_current(nanopub_uri): nanopub = app.nanopub_manager.get(nanopub_uri) service.process_graph(nanopub) else: print("Skipping retired nanopub", nanopub_uri) def setup_periodic_task(task): @self.celery.task def find_instances(): print("Triggered task", task['name']) for x, in task['service'].getInstances(app.db): task['do'](x) @self.celery.task def do_task(uri): print("Running task", task['name'], 'on', uri) resource = app.get_resource(uri) # result never used task['service'].process_graph(resource.graph) task['service'].app = app task['find_instances'] = find_instances task['do'] = do_task return task app.inference_tasks = [] if 'inference_tasks' in self.config: app.inference_tasks = [ setup_periodic_task(task) for task in self.config['inference_tasks'] ] for name, task in list(self.config['inferencers'].items()): task.app = app for task in app.inference_tasks: if 'schedule' in task: #print "Scheduling task", task['name'], task['schedule'] self.celery.add_periodic_task(crontab(**task['schedule']), task['find_instances'].s(), name=task['name']) else: task['find_instances'].delay() @self.celery.task() def update(nanopub_uri): '''gets called whenever there is a change in the knowledge graph. Performs a breadth-first knowledge expansion of the current change.''' #print "Updating on", nanopub_uri #if not app.nanopub_manager.is_current(nanopub_uri): # print("Skipping retired nanopub", nanopub_uri) # return nanopub = app.nanopub_manager.get(nanopub_uri) nanopub_graph = ConjunctiveGraph(nanopub.store) if 'inferencers' in self.config: for name, service in list(self.config['inferencers'].items()): service.app = self if service.query_predicate == self.NS.whyis.updateChangeQuery: if service.getInstances(nanopub_graph): print("invoking", name, nanopub_uri) process_nanopub.apply_async(kwargs={ 'nanopub_uri': nanopub_uri, 'service_name': name }, priority=1) for name, service in list(self.config['inferencers'].items()): service.app = self if service.query_predicate == self.NS.whyis.globalChangeQuery: process_resource.apply_async( kwargs={'service_name': name}, priority=5) def run_update(nanopub_uri): update.apply_async(args=[nanopub_uri], priority=9) self.nanopub_update_listener = run_update app = self @self.celery.task(base=self.celery.QueueOnce, once={'graceful': True}, retry_backoff=True, retry_jitter=True, autoretry_for=(Exception, ), max_retries=4, bind=True) def run_importer(self, entity_name): entity_name = URIRef(entity_name) print('importing', entity_name) importer = app.find_importer(entity_name) if importer is None: return importer.app = app modified = importer.last_modified(entity_name, app.db, app.nanopub_manager) updated = importer.modified(entity_name) if updated is None: updated = datetime.now(pytz.utc) print("Remote modified:", updated, type(updated), "Local modified:", modified, type(modified)) if modified is None or (updated - modified ).total_seconds() > importer.min_modified: importer.load(entity_name, app.db, app.nanopub_manager) self.run_importer = run_importer self.template_imports = {} if 'template_imports' in self.config: for name, imp in list(self.config['template_imports'].items()): try: m = importlib.import_module(imp) self.template_imports[name] = m except Exception: print( "Error importing module %s into template variable %s." % (imp, name)) raise self.nanopub_manager = NanopublicationManager( self.db.store, Namespace('%s/pub/' % (self.config['lod_prefix'])), self, update_listener=self.nanopub_update_listener) if 'CACHE_TYPE' in self.config: from flask_caching import Cache self.cache = Cache(self) else: self.cache = None _file_depot = None @property def file_depot(self): if self._file_depot is None: if DepotManager.get('files') is None: DepotManager.configure('files', self.config['file_archive']) self._file_depot = DepotManager.get('files') return self._file_depot _nanopub_depot = None @property def nanopub_depot(self): if self._nanopub_depot is None and 'nanopub_archive' in self.config: if DepotManager.get('nanopublications') is None: DepotManager.configure('nanopublications', self.config['nanopub_archive']) self._nanopub_depot = DepotManager.get('nanopublications') return self._nanopub_depot def configure_database(self): """ Database configuration should be set here """ self.NS = NS self.NS.local = rdflib.Namespace(self.config['lod_prefix'] + '/') self.admin_db = database.engine_from_config(self.config, "admin_") self.db = database.engine_from_config(self.config, "knowledge_") self.db.app = self self.vocab = ConjunctiveGraph() #print URIRef(self.config['vocab_file']) default_vocab = Graph(store=self.vocab.store) default_vocab.parse(source=os.path.abspath( os.path.join(os.path.dirname(__file__), "default_vocab.ttl")), format="turtle", publicID=str(self.NS.local)) custom_vocab = Graph(store=self.vocab.store) custom_vocab.parse(self.config['vocab_file'], format="turtle", publicID=str(self.NS.local)) self.datastore = WhyisUserDatastore(self.admin_db, {}, self.config['lod_prefix']) self.security = Security(self, self.datastore, register_form=ExtendedRegisterForm) def __weighted_route(self, *args, **kwargs): """ Override the match_compare_key function of the Rule created by invoking Flask.route. This can only be done on the app, not in a blueprint, because blueprints lazily add Rule's when they are registered on an app. """ def decorator(view_func): compare_key = kwargs.pop('compare_key', None) # register view_func with route self.route(*args, **kwargs)(view_func) if compare_key is not None: rule = self.url_map._rules[-1] rule.match_compare_key = lambda: compare_key return view_func return decorator def map_entity(self, name): for importer in self.config['namespaces']: if importer.matches(name): new_name = importer.map(name) #print 'Found mapped URI', new_name return new_name, importer return None, None def find_importer(self, name): for importer in self.config['namespaces']: if importer.resource_matches(name): return importer return None class Entity(rdflib.resource.Resource): _this = None def this(self): if self._this is None: self._this = self._graph.app.get_entity(self.identifier) return self._this _description = None def description(self): if self._description is None: # try: result = Graph() # try: for quad in self._graph.query( ''' construct { ?e ?p ?o. ?o rdfs:label ?label. ?o skos:prefLabel ?prefLabel. ?o dc:title ?title. ?o foaf:name ?name. ?o ?pattr ?oattr. ?oattr rdfs:label ?oattrlabel } where { graph ?g { ?e ?p ?o. } ?g a np:Assertion. optional { ?e sio:hasAttribute|sio:hasPart ?o. ?o ?pattr ?oattr. optional { ?oattr rdfs:label ?oattrlabel. } } optional { ?o rdfs:label ?label. } optional { ?o skos:prefLabel ?prefLabel. } optional { ?o dc:title ?title. } optional { ?o foaf:name ?name. } }''', initNs=NS.prefixes, initBindings={'e': self.identifier}): if len(quad) == 3: s, p, o = quad else: # Last term is never used s, p, o, _ = quad result.add((s, p, o)) # except: # pass self._description = result.resource(self.identifier) # except Exception as e: # print str(e), self.identifier # raise e return self._description def get_resource(self, entity, async_=True, retrieve=True): if retrieve: mapped_name, importer = self.map_entity(entity) if mapped_name is not None: entity = mapped_name if importer is None: importer = self.find_importer(entity) print(entity, importer) if importer is not None: modified = importer.last_modified(entity, self.db, self.nanopub_manager) if modified is None or async_ is False: self.run_importer(entity) elif not importer.import_once: print("Type of modified is", type(modified)) self.run_importer.delay(entity) return self.Entity(self.db, entity) def configure_template_filters(self): filters.configure(self) if 'filters' in self.config: for name, fn in self.config['filters'].items(): self.template_filter(name)(fn) def add_file(self, f, entity, nanopub): entity = rdflib.URIRef(entity) old_nanopubs = [] for np_uri, np_assertion, in self.db.query( '''select distinct ?np ?assertion where { hint:Query hint:optimizer "Runtime" . graph ?assertion {?e whyis:hasFileID ?fileid} ?np np:hasAssertion ?assertion. }''', initNs=NS.prefixes, initBindings=dict(e=rdflib.URIRef(entity))): if not self._can_edit(np_uri): raise Unauthorized() old_nanopubs.append((np_uri, np_assertion)) fileid = self.file_depot.create(f.stream, f.filename, f.mimetype) nanopub.add((nanopub.identifier, NS.sio.isAbout, entity)) nanopub.assertion.add((entity, NS.whyis.hasFileID, Literal(fileid))) if current_user._get_current_object() is not None and hasattr( current_user, 'identifier'): nanopub.assertion.add( (entity, NS.dc.contributor, current_user.identifier)) nanopub.assertion.add( (entity, NS.dc.created, Literal(datetime.utcnow()))) nanopub.assertion.add( (entity, NS.ov.hasContentType, Literal(f.mimetype))) nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype])) nanopub.assertion.add( (NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add( (entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]])) nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File)) if current_user._get_current_object() is not None and hasattr( current_user, 'identifier'): nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.contributor, current_user.identifier)) nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created, Literal(datetime.utcnow()))) return old_nanopubs def delete_file(self, entity): for np_uri, in self.db.query('''select distinct ?np where { hint:Query hint:optimizer "Runtime" . graph ?np_assertion {?e whyis:hasFileID ?fileid} ?np np:hasAssertion ?np_assertion. }''', initNs=NS.prefixes, initBindings=dict(e=entity)): if not self._can_edit(np_uri): raise Unauthorized() self.nanopub_manager.retire(np_uri) def add_files(self, uri, files, upload_type=NS.pv.File): nanopub = self.nanopub_manager.new() added_files = False old_nanopubs = [] nanopub.assertion.add((uri, self.NS.RDF.type, upload_type)) if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"): for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri + "/" + filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add((uri, NS.dc.hasPart, file_uri)) added_files = True elif upload_type == NS.dcat.Dataset: for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri + "/" + filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add( (uri, NS.dcat.distribution, file_uri)) nanopub.assertion.add( (file_uri, NS.RDF.type, NS.dcat.Distribution)) nanopub.assertion.add( (file_uri, NS.dcat.downloadURL, file_uri)) added_files = True else: for f in files: if f.filename != '': old_nanopubs.extend(self.add_file(f, uri, nanopub)) nanopub.assertion.add((uri, NS.RDF.type, NS.pv.File)) added_files = True break if added_files: for old_np, old_np_assertion in old_nanopubs: nanopub.pubinfo.add((nanopub.assertion.identifier, NS.prov.wasRevisionOf, old_np_assertion)) self.nanopub_manager.retire(old_np) for n in self.nanopub_manager.prepare(nanopub): self.nanopub_manager.publish(n) def _can_edit(self, uri): if self.managed: return True if current_user._get_current_object() is None: # This isn't null even when not authenticated, unless we are an autonomic agent. return True if not hasattr(current_user, 'identifier'): # This is an anonymous user. return False if current_user.has_role('Publisher') or current_user.has_role( 'Editor') or current_user.has_role('Admin'): return True if self.db.query('''ask { ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info. graph ?info { ?assertion dc:contributor ?user. } }''', initBindings=dict(nanopub=uri, user=current_user.identifier), initNs=dict(np=self.NS.np, dc=self.NS.dc)): #print "Is owner." return True return False def configure_views(self): def sort_by(resources, property): return sorted(resources, key=lambda x: x.value(property)) def camel_case_split(identifier): matches = finditer( '.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier) return [m.group(0) for m in matches] label_properties = [ self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.schema.name, self.NS.dc.title, self.NS.foaf.name, self.NS.schema.name, self.NS.skos.notation ] @lru_cache(maxsize=1000) def get_remote_label(uri): for db in [self.db, self.admin_db]: g = Graph() try: db.nsBindings = {} g += db.query('''select ?s ?p ?o where { hint:Query hint:optimizer "Runtime" . ?s ?p ?o.}''', initNs=self.NS.prefixes, initBindings=dict(s=uri)) db.nsBindings = {} except: pass resource_entity = g.resource(uri) if len(resource_entity.graph) == 0: #print "skipping", db continue for property in label_properties: labels = self.lang_filter(resource_entity[property]) if len(labels) > 0: return labels[0] if len(labels) == 0: name = [ x.value for x in [ resource_entity.value(self.NS.foaf.givenName), resource_entity.value(self.NS.foaf.familyName) ] if x is not None ] if len(labels) == 0: name = [ x.value for x in [ resource_entity.value( self.NS.schema.givenName), resource_entity.value( self.NS.schema.familyName) ] if x is not None ] if len(name) > 0: label = ' '.join(name) return label try: label = self.db.qname(uri).split(":")[1].replace("_", " ") return ' '.join(camel_case_split(label)).title() except Exception as e: print(str(e), uri) return str(uri) def get_label(resource): for property in label_properties: labels = self.lang_filter(resource[property]) #print "mem", property, label if len(labels) > 0: return labels[0] return get_remote_label(resource.identifier) self.get_label = get_label def initialize_g(): if not hasattr(g, "initialized"): g.initialized = True g.ns = self.NS g.get_summary = get_summary g.get_label = get_label g.labelize = self.labelize g.get_resource = self.get_resource g.get_entity = self.get_entity g.rdflib = rdflib g.isinstance = isinstance g.current_user = current_user g.slugify = slugify g.db = self.db self.initialize_g = initialize_g @self.before_request def load_forms(): if 'authenticators' in self.config: for authenticator in self.config['authenticators']: user = authenticator.authenticate(request, self.datastore, self.config) if user is not None: # login_user(user) break initialize_g() @self.login_manager.user_loader def load_user(user_id): if user_id != None: #try: user = self.datastore.find_user(id=user_id) return user #except: # return None else: return None # def get_graphs(graphs): # query = '''select ?s ?p ?o ?g where { # hint:Query hint:optimizer "Runtime" . # # graph ?g {?s ?p ?o} # } values ?g { %s }''' # query = query % ' '.join([graph.n3() for graph in graphs]) # #print query # quads = self.db.store.query(query, initNs=self.NS.prefixes) # result = rdflib.Dataset() # result.addN(quads) # return result # def explain(graph): # values = ')\n ('.join([' '.join([x.n3() for x in triple]) for triple in graph.triples((None,None,None))]) # values = 'VALUES (?s ?p ?o)\n{\n('+ values + ')\n}' # # try: # nanopubs = self.db.query('''select distinct ?np where { # hint:Query hint:optimizer "Runtime" . # ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; # np:hasPublicationInfo ?pubinfo; # np:hasAssertion ?assertion; # graph ?assertion { ?s ?p ?o.} # }''' + values, initNs=self.NS.prefixes) # result = ConjunctiveGraph() # for nanopub_uri, in nanopubs: # self.nanopub_manager.get(nanopub_uri, result) # except Exception as e: # print(str(e), entity) # raise e # return result.resource(entity) def get_entity_sparql(entity): try: statements = self.db.query( '''select distinct ?s ?p ?o ?g where { hint:Query hint:optimizer "Runtime" . ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; {graph ?np { ?np sio:isAbout ?e.}} UNION {graph ?assertion { ?e ?p ?o.}} graph ?g { ?s ?p ?o } }''', initBindings={'e': entity}, initNs=self.NS.prefixes) result = ConjunctiveGraph() result.addN(statements) except Exception as e: print(str(e), entity) raise e #print result.serialize(format="trig") return result.resource(entity) # def get_entity_disk(entity): # try: # nanopubs = self.db.query('''select distinct ?np where { # hint:Query hint:optimizer "Runtime" . # ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; # np:hasPublicationInfo ?pubinfo; # np:hasAssertion ?assertion; # # {graph ?np { ?np sio:isAbout ?e.}} # UNION # {graph ?assertion { ?e ?p ?o.}} # }''',initBindings={'e':entity}, initNs=self.NS.prefixes) # result = ConjunctiveGraph() # for nanopub_uri, in nanopubs: # self.nanopub_manager.get(nanopub_uri, result) # # result.addN(nanopubs) # except Exception as e: # print(str(e), entity) # raise e # #print result.serialize(format="trig") # return result.resource(entity) get_entity = get_entity_sparql self.get_entity = get_entity def get_summary(resource): summary_properties = [ self.NS.skos.definition, self.NS.schema.description, self.NS.dc.abstract, self.NS.dc.description, self.NS.dc.summary, self.NS.RDFS.comment, self.NS.dcelements.description, URIRef("http://purl.obolibrary.org/obo/IAO_0000115"), self.NS.prov.value, self.NS.sio.hasValue ] if 'summary_properties' in self.config: summary_properties.extend(self.config['summary_properties']) for property in summary_properties: terms = self.lang_filter(resource[property]) for term in terms: yield (property, term) self.get_summary = get_summary if 'WHYIS_CDN_DIR' in self.config and self.config[ 'WHYIS_CDN_DIR'] is not None: @self.route('/cdn/<path:filename>') def cdn(filename): return send_from_directory(self.config['WHYIS_CDN_DIR'], filename) def render_view(resource, view=None, args=None, use_cache=True): self.initialize_g() if view is None and 'view' in request.args: view = request.args['view'] if view is None: view = 'view' if use_cache and self.cache is not None: key = str((str(resource.identifier), view)) result = self.cache.get(key) if result is not None: r, headers = result return r, 200, headers template_args = dict() template_args.update(self.template_imports) template_args.update( dict(ns=self.NS, this=resource, g=g, current_user=current_user, isinstance=isinstance, args=request.args if args is None else args, url_for=url_for, app=self, view=view, get_entity=get_entity, get_summary=get_summary, search=search, rdflib=rdflib, config=self.config, hasattr=hasattr, set=set)) types = [] if 'as' in request.args: types = [URIRef(request.args['as']), 0] types.extend( (x, 1) for x in self.vocab[resource.identifier:NS.RDF.type]) if len( types ) == 0: # KG types cannot override vocab types. This should keep views stable where critical. types.extend([(x.identifier, 1) for x in resource[NS.RDF.type] if isinstance(x.identifier, rdflib.URIRef)]) #if len(types) == 0: types.append([self.NS.RDFS.Resource, 100]) type_string = ' '.join( ["(%s %d '%s')" % (x.n3(), i, view) for x, i in types]) view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c ?content_type where { values (?c ?priority ?id) { %s } ?c rdfs:subClassOf* ?mid. ?mid rdfs:subClassOf* ?class. ?class ?viewProperty ?view. ?viewProperty rdfs:subPropertyOf* whyis:hasView. ?viewProperty dc:identifier ?id. optional { ?viewProperty dc:format ?content_type } } group by ?c ?class ?content_type order by ?rank ''' % type_string #print view_query views = list( self.vocab.query(view_query, initNs=dict(whyis=self.NS.whyis, dc=self.NS.dc))) if len(views) == 0: abort(404) headers = {'Content-Type': "text/html"} extension = views[0]['view'].value.split(".")[-1] if extension in DATA_EXTENSIONS: headers['Content-Type'] = DATA_EXTENSIONS[extension] print(views[0]['view'], views[0]['content_type']) if views[0]['content_type'] is not None: headers['Content-Type'] = views[0]['content_type'] # default view (list of nanopubs) # if available, replace with class view # if available, replace with instance view return render_template(views[0]['view'].value, **template_args), 200, headers self.render_view = render_view # Register blueprints self.register_blueprint(nanopub_blueprint) self.register_blueprint(sparql_blueprint) self.register_blueprint(entity_blueprint) self.register_blueprint(tableview_blueprint) def get_entity_uri(self, name, format): content_type = None if format is not None: if format in DATA_EXTENSIONS: content_type = DATA_EXTENSIONS[format] else: name = '.'.join([name, format]) if name is not None: entity = self.NS.local[name] elif 'uri' in request.args: entity = URIRef(request.args['uri']) else: entity = self.NS.local.Home return entity, content_type def get_send_file_max_age(self, filename): if self.debug: return 0 else: return Empty.get_send_file_max_age(self, filename)
class App(Empty): managed = False def configure_extensions(self): Empty.configure_extensions(self) self.celery = Celery(self.name, broker=self.config['CELERY_BROKER_URL'], beat=True) self.celery.conf.update(self.config) app = self self.redis = self.celery.broker_connection().default_channel.client if 'root_path' in self.config: self.root_path = self.config['root_path'] if 'WHYIS_TEMPLATE_DIR' in self.config and app.config['WHYIS_TEMPLATE_DIR'] is not None: my_loader = jinja2.ChoiceLoader( [jinja2.FileSystemLoader(p) for p in self.config['WHYIS_TEMPLATE_DIR']] + [app.jinja_loader] ) app.jinja_loader = my_loader def setup_task(service): service.app = app print(service) result = None if service.query_predicate == self.NS.whyis.globalChangeQuery: result = process_resource else: result = process_nanopub result.service = lambda : service return result @self.celery.task def process_resource(service_name, taskid=None): service = self.config['inferencers'][service_name] if is_waiting(service_name): print("Deferring to a later invocation.", service_name) return print(service_name) service.process_graph(app.db) @self.celery.task def process_nanopub(nanopub_uri, service_name, taskid=None): service = self.config['inferencers'][service_name] print(service, nanopub_uri) if app.nanopub_manager.is_current(nanopub_uri): nanopub = app.nanopub_manager.get(nanopub_uri) service.process_graph(nanopub) else: print("Skipping retired nanopub", nanopub_uri) def setup_periodic_task(task): @self.celery.task def find_instances(): print("Triggered task", task['name']) for x, in task['service'].getInstances(app.db): task['do'](x) @self.celery.task def do_task(uri): print("Running task", task['name'], 'on', uri) resource = app.get_resource(uri) # result never used task['service'].process_graph(resource.graph) task['service'].app = app task['find_instances'] = find_instances task['do'] = do_task return task app.inference_tasks = [] if 'inference_tasks' in self.config: app.inference_tasks = [setup_periodic_task(task) for task in self.config['inference_tasks']] for name, task in list(self.config['inferencers'].items()): task.app = app for task in app.inference_tasks: if 'schedule' in task: #print "Scheduling task", task['name'], task['schedule'] self.celery.add_periodic_task( crontab(**task['schedule']), task['find_instances'].s(), name=task['name'] ) else: task['find_instances'].delay() def is_waiting(service_name): """ Check if a task is waiting. """ scheduled_tasks = list(inspect().scheduled().values())[0] for task in scheduled_tasks: if 'kwargs' in task: args = eval(task['kwargs']) if service_name == args.get('service_name',None): return True return False def is_running_waiting(service_name): """ Check if a task is running or waiting. """ if is_waiting(service_name): return True running_tasks = list(inspect().active().values())[0] for task in running_tasks: if 'kwargs' in task: args = eval(task['kwargs']) if service_name == args.get('service_name',None): return True return False @self.celery.task() def update(nanopub_uri): '''gets called whenever there is a change in the knowledge graph. Performs a breadth-first knowledge expansion of the current change.''' #print "Updating on", nanopub_uri if not app.nanopub_manager.is_current(nanopub_uri): print("Skipping retired nanopub", nanopub_uri) return nanopub = app.nanopub_manager.get(nanopub_uri) nanopub_graph = ConjunctiveGraph(nanopub.store) if 'inferencers' in self.config: for name, service in list(self.config['inferencers'].items()): service.app = self if service.query_predicate == self.NS.whyis.updateChangeQuery: #print "checking", name, nanopub_uri, service.get_query() if service.getInstances(nanopub_graph): print("invoking", name, nanopub_uri) process_nanopub.apply_async(kwargs={'nanopub_uri': nanopub_uri, 'service_name':name}, priority=1 ) for name, service in list(self.config['inferencers'].items()): service.app = self if service.query_predicate == self.NS.whyis.globalChangeQuery and not is_running_waiting(name): #print "checking", name, service.get_query() process_resource.apply_async(kwargs={'service_name':name}, priority=5) def run_update(nanopub_uri): update.apply_async(args=[nanopub_uri],priority=9) self.nanopub_update_listener = run_update def is_waiting_importer(entity_name, exclude=None): """ Check if a task is running or waiting. """ if inspect().scheduled(): tasks = list(inspect().scheduled().values()) for task in tasks: if 'args' in task and entity_name in task['args']: return True return False app = self @self.celery.task(retry_backoff=True, retry_jitter=True,autoretry_for=(Exception,),max_retries=4, bind=True) def run_importer(self, entity_name): entity_name = URIRef(entity_name) counter = app.redis.incr(("import",entity_name)) if counter > 1: return print('importing', entity_name) importer = app.find_importer(entity_name) if importer is None: return importer.app = app modified = importer.last_modified(entity_name, app.db, app.nanopub_manager) updated = importer.modified(entity_name) if updated is None: updated = datetime.now(pytz.utc) print("Remote modified:", updated, type(updated), "Local modified:", modified, type(modified)) if modified is None or (updated - modified).total_seconds() > importer.min_modified: importer.load(entity_name, app.db, app.nanopub_manager) app.redis.set(("import",entity_name),0) self.run_importer = run_importer self.template_imports = {} if 'template_imports' in self.config: for name, imp in list(self.config['template_imports'].items()): try: m = importlib.import_module(imp) self.template_imports[name] = m except Exception: print("Error importing module %s into template variable %s." % (imp, name)) raise def configure_database(self): """ Database configuration should be set here """ self.NS = NS self.NS.local = rdflib.Namespace(self.config['lod_prefix']+'/') self.admin_db = database.engine_from_config(self.config, "admin_") self.db = database.engine_from_config(self.config, "knowledge_") self.db.app = self self.vocab = ConjunctiveGraph() #print URIRef(self.config['vocab_file']) default_vocab = Graph(store=self.vocab.store) default_vocab.parse("default_vocab.ttl", format="turtle", publicID=str(self.NS.local)) custom_vocab = Graph(store=self.vocab.store) custom_vocab.parse(self.config['vocab_file'], format="turtle", publicID=str(self.NS.local)) self.datastore = WhyisUserDatastore(self.admin_db, {}, self.config['lod_prefix']) self.security = Security(self, self.datastore, register_form=ExtendedRegisterForm) self.file_depot = DepotManager.get('files') if self.file_depot is None: DepotManager.configure('files', self.config['file_archive']) self.file_depot = DepotManager.get('files') if DepotManager.get('nanopublications') is None: DepotManager.configure('nanopublications', self.config['nanopub_archive']) def weighted_route(self, *args, **kwargs): def decorator(view_func): compare_key = kwargs.pop('compare_key', None) # register view_func with route self.route(*args, **kwargs)(view_func) if compare_key is not None: rule = self.url_map._rules[-1] rule.match_compare_key = lambda: compare_key return view_func return decorator def map_entity(self, name): for importer in self.config['namespaces']: if importer.matches(name): new_name = importer.map(name) #print 'Found mapped URI', new_name return new_name, importer return None, None def find_importer(self, name): for importer in self.config['namespaces']: if importer.resource_matches(name): return importer return None class Entity (rdflib.resource.Resource): _this = None def this(self): if self._this is None: self._this = self._graph.app.get_entity(self.identifier) return self._this _description = None def description(self): if self._description is None: # try: result = Graph() # try: for quad in self._graph.query(''' construct { ?e ?p ?o. ?o rdfs:label ?label. ?o skos:prefLabel ?prefLabel. ?o dc:title ?title. ?o foaf:name ?name. ?o ?pattr ?oatter. ?oattr rdfs:label ?oattrlabel } where { graph ?g { ?e ?p ?o. } ?g a np:Assertion. optional { ?e sio:hasAttribute|sio:hasPart ?o. ?o ?pattr ?oattr. optional { ?oattr rdfs:label ?oattrlabel. } } optional { ?o rdfs:label ?label. } optional { ?o skos:prefLabel ?prefLabel. } optional { ?o dc:title ?title. } optional { ?o foaf:name ?name. } }''', initNs=NS.prefixes, initBindings={'e':self.identifier}): if len(quad) == 3: s,p,o = quad else: # Last term is never used s,p,o,_ = quad result.add((s,p,o)) # except: # pass self._description = result.resource(self.identifier) # except Exception as e: # print str(e), self.identifier # raise e return self._description def get_resource(self, entity, async_=True, retrieve=True): if retrieve: mapped_name, importer = self.map_entity(entity) if mapped_name is not None: entity = mapped_name if importer is None: importer = self.find_importer(entity) print(entity, importer) if importer is not None: modified = importer.last_modified(entity, self.db, self.nanopub_manager) if modified is None or async_ is False: self.run_importer(entity) elif not importer.import_once: print("Type of modified is",type(modified)) self.run_importer.delay(entity) return self.Entity(self.db, entity) def configure_template_filters(self): filters.configure(self) if 'filters' in self.config: for name, fn in self.config['filters'].items(): self.template_filter(name)(fn) def add_file(self, f, entity, nanopub): entity = rdflib.URIRef(entity) old_nanopubs = [] for np_uri, np_assertion, in self.db.query('''select distinct ?np ?assertion where { hint:Query hint:optimizer "Runtime" . graph ?assertion {?e whyis:hasFileID ?fileid} ?np np:hasAssertion ?assertion. }''', initNs=NS.prefixes, initBindings=dict(e=rdflib.URIRef(entity))): if not self._can_edit(np_uri): raise Unauthorized() old_nanopubs.append((np_uri, np_assertion)) fileid = self.file_depot.create(f.stream, f.filename, f.mimetype) nanopub.add((nanopub.identifier, NS.sio.isAbout, entity)) nanopub.assertion.add((entity, NS.whyis.hasFileID, Literal(fileid))) if current_user._get_current_object() is not None and hasattr(current_user, 'identifier'): nanopub.assertion.add((entity, NS.dc.contributor, current_user.identifier)) nanopub.assertion.add((entity, NS.dc.created, Literal(datetime.utcnow()))) nanopub.assertion.add((entity, NS.ov.hasContentType, Literal(f.mimetype))) nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype])) nanopub.assertion.add((NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]])) nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File)) if current_user._get_current_object() is not None and hasattr(current_user, 'identifier'): nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.contributor, current_user.identifier)) nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created, Literal(datetime.utcnow()))) return old_nanopubs def delete_file(self, entity): for np_uri, in self.db.query('''select distinct ?np where { hint:Query hint:optimizer "Runtime" . graph ?np_assertion {?e whyis:hasFileID ?fileid} ?np np:hasAssertion ?np_assertion. }''', initNs=NS.prefixes, initBindings=dict(e=entity)): if not self._can_edit(np_uri): raise Unauthorized() self.nanopub_manager.retire(np_uri) def add_files(self, uri, files, upload_type=NS.pv.File): nanopub = self.nanopub_manager.new() added_files = False old_nanopubs = [] nanopub.assertion.add((uri, self.NS.RDF.type, upload_type)) if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"): for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri+"/"+filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add((uri, NS.dc.hasPart, file_uri)) added_files = True elif upload_type == NS.dcat.Dataset: for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri+"/"+filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add((uri, NS.dcat.distribution, file_uri)) nanopub.assertion.add((file_uri, NS.RDF.type, NS.dcat.Distribution)) nanopub.assertion.add((file_uri, NS.dcat.downloadURL, file_uri)) added_files = True else: for f in files: if f.filename != '': old_nanopubs.extend(self.add_file(f, uri, nanopub)) nanopub.assertion.add((uri, NS.RDF.type, NS.pv.File)) added_files = True break if added_files: for old_np, old_np_assertion in old_nanopubs: nanopub.pubinfo.add((nanopub.assertion.identifier, NS.prov.wasRevisionOf, old_np_assertion)) self.nanopub_manager.retire(old_np) for n in self.nanopub_manager.prepare(nanopub): self.nanopub_manager.publish(n) def _can_edit(self, uri): if self.managed: return True if current_user._get_current_object() is None: # This isn't null even when not authenticated, unless we are an autonomic agent. return True if not hasattr(current_user, 'identifier'): # This is an anonymous user. return False if current_user.has_role('Publisher') or current_user.has_role('Editor') or current_user.has_role('Admin'): return True if self.db.query('''ask { ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info. graph ?info { ?assertion dc:contributor ?user. } }''', initBindings=dict(nanopub=uri, user=current_user.identifier), initNs=dict(np=self.NS.np, dc=self.NS.dc)): #print "Is owner." return True return False def configure_views(self): def sort_by(resources, property): return sorted(resources, key=lambda x: x.value(property)) def camel_case_split(identifier): matches = finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier) return [m.group(0) for m in matches] label_properties = [self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.schema.name, self.NS.dc.title, self.NS.foaf.name, self.NS.schema.name] @lru_cache(maxsize=1000) def get_remote_label(uri): for db in [self.db, self.admin_db]: g = Graph() try: db.nsBindings = {} g += db.query('''select ?s ?p ?o where { hint:Query hint:optimizer "Runtime" . ?s ?p ?o.}''', initNs=self.NS.prefixes, initBindings=dict(s=uri)) db.nsBindings = {} except: pass resource_entity = g.resource(uri) if len(resource_entity.graph) == 0: #print "skipping", db continue for property in label_properties: labels = self.lang_filter(resource_entity[property]) if len(labels) > 0: return labels[0] if len(labels) == 0: name = [x.value for x in [resource_entity.value(self.NS.foaf.givenName), resource_entity.value(self.NS.foaf.familyName)] if x is not None] if len(name) > 0: label = ' '.join(name) return label try: label = self.db.qname(uri).split(":")[1].replace("_"," ") return ' '.join(camel_case_split(label)).title() except Exception as e: print(str(e), uri) return str(uri) def get_label(resource): for property in label_properties: labels = self.lang_filter(resource[property]) #print "mem", property, label if len(labels) > 0: return labels[0] return get_remote_label(resource.identifier) @self.before_request def load_forms(): if 'authenticators' in self.config: for authenticator in self.config['authenticators']: user = authenticator.authenticate(request, self.datastore, self.config) if user is not None: # login_user(user) break #g.search_form = SearchForm() g.ns = self.NS g.get_summary = get_summary g.get_label = get_label g.labelize = self.labelize g.get_resource = self.get_resource g.get_entity = self.get_entity g.rdflib = rdflib g.isinstance = isinstance g.current_user = current_user g.slugify = slugify g.db = self.db @self.login_manager.user_loader def load_user(user_id): if user_id != None: #try: user = self.datastore.find_user(id=user_id) return user #except: # return None else: return None extensions = { "rdf": "application/rdf+xml", "jsonld": "application/ld+json", "json": "application/json", "ttl": "text/turtle", "trig": "application/trig", "turtle": "text/turtle", "owl": "application/rdf+xml", "nq": "application/n-quads", "nt": "application/n-triples", "html": "text/html" } dataFormats = { "application/rdf+xml" : "xml", "application/ld+json" : 'json-ld', "application/json" : 'json-ld', "text/turtle" : "turtle", "application/trig" : "trig", "application/n-quads" : "nquads", "application/n-triples" : "nt", "application/rdf+json" : "json", "text/html" : None, "application/xhtml+xml" : None, "application/xhtml" : None, None: "json-ld" } htmls = set(['application/xhtml','text/html', 'application/xhtml+xml']) def get_graphs(graphs): query = '''select ?s ?p ?o ?g where { hint:Query hint:optimizer "Runtime" . graph ?g {?s ?p ?o} } values ?g { %s }''' query = query % ' '.join([graph.n3() for graph in graphs]) #print query quads = self.db.store.query(query, initNs=self.NS.prefixes) result = rdflib.Dataset() result.addN(quads) return result def explain(graph): values = ')\n ('.join([' '.join([x.n3() for x in triple]) for triple in graph.triples((None,None,None))]) values = 'VALUES (?s ?p ?o)\n{\n('+ values + ')\n}' try: nanopubs = self.db.query('''select distinct ?np where { hint:Query hint:optimizer "Runtime" . ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; graph ?assertion { ?s ?p ?o.} }''' + values, initNs=self.NS.prefixes) result = ConjunctiveGraph() for nanopub_uri, in nanopubs: self.nanopub_manager.get(nanopub_uri, result) except Exception as e: print(str(e), entity) raise e return result.resource(entity) def get_entity_sparql(entity): try: statements = self.db.query('''select distinct ?s ?p ?o ?g where { hint:Query hint:optimizer "Runtime" . ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; {graph ?np { ?np sio:isAbout ?e.}} UNION {graph ?assertion { ?e ?p ?o.}} graph ?g { ?s ?p ?o } }''',initBindings={'e':entity}, initNs=self.NS.prefixes) result = ConjunctiveGraph() result.addN(statements) except Exception as e: print(str(e), entity) raise e #print result.serialize(format="trig") return result.resource(entity) def get_entity_disk(entity): try: nanopubs = self.db.query('''select distinct ?np where { hint:Query hint:optimizer "Runtime" . ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; {graph ?np { ?np sio:isAbout ?e.}} UNION {graph ?assertion { ?e ?p ?o.}} }''',initBindings={'e':entity}, initNs=self.NS.prefixes) result = ConjunctiveGraph() for nanopub_uri, in nanopubs: self.nanopub_manager.get(nanopub_uri, result) # result.addN(nanopubs) except Exception as e: print(str(e), entity) raise e #print result.serialize(format="trig") return result.resource(entity) get_entity = get_entity_sparql self.get_entity = get_entity def get_summary(resource): summary_properties = [ self.NS.skos.definition, self.NS.schema.description, self.NS.dc.abstract, self.NS.dc.description, self.NS.dc.summary, self.NS.RDFS.comment, self.NS.dcelements.description, URIRef("http://purl.obolibrary.org/obo/IAO_0000115"), self.NS.prov.value, self.NS.sio.hasValue ] if 'summary_properties' in self.config: summary_properties.extend(self.config['summary_properties']) for property in summary_properties: terms = self.lang_filter(resource[property]) for term in terms: yield (property, term) self.get_summary = get_summary @self.route('/sparql', methods=['GET', 'POST']) @conditional_login_required def sparql_view(): has_query = False for arg in list(request.args.keys()): if arg.lower() == "update": return "Update not allowed.", 403 if arg.lower() == 'query': has_query = True if request.method == 'GET' and not has_query: return redirect(url_for('sparql_form')) #print self.db.store.query_endpoint if request.method == 'GET': headers = {} headers.update(request.headers) if 'Content-Length' in headers: del headers['Content-Length'] req = requests.get(self.db.store.query_endpoint, headers = headers, params=request.args) elif request.method == 'POST': if 'application/sparql-update' in request.headers['content-type']: return "Update not allowed.", 403 #print(request.get_data()) req = requests.post(self.db.store.query_endpoint, data=request.get_data(), headers = request.headers, params=request.args) #print self.db.store.query_endpoint #print req.status_code response = Response(req.content, content_type = req.headers['content-type']) #response.headers[con(req.headers) return response, req.status_code @self.route('/sparql.html') @conditional_login_required def sparql_form(): template_args = dict(ns=self.NS, g=g, config=self.config, current_user=current_user, isinstance=isinstance, rdflib=rdflib, hasattr=hasattr, set=set) return render_template('sparql.html',endpoint="/sparql", **template_args) if 'WHYIS_CDN_DIR' in self.config and self.config['WHYIS_CDN_DIR'] is not None: @self.route('/cdn/<path:filename>') def cdn(filename): return send_from_directory(self.config['WHYIS_CDN_DIR'], filename) @self.route('/about.<format>', methods=['GET','POST','DELETE']) @self.weighted_route('/<path:name>', compare_key=bottom_compare_key, methods=['GET','POST','DELETE']) @self.weighted_route('/<path:name>.<format>', compare_key=bottom_compare_key, methods=['GET','POST','DELETE']) @self.route('/', methods=['GET','POST','DELETE']) @self.route('/home', methods=['GET','POST','DELETE']) @self.route('/about', methods=['GET','POST','DELETE']) @conditional_login_required def view(name=None, format=None, view=None): self.db.store.nsBindings = {} content_type = None if format is not None: if format in extensions: content_type = extensions[format] else: name = '.'.join([name, format]) #argstring = '&'.join(["%s=%s"%(k,v) for k,v in request.args.iteritems(multi=True) if k != 'value']) if name is not None: #if len(argstring) > 0: # name = name + "?" + argstring entity = self.NS.local[name] elif 'uri' in request.args: entity = URIRef(request.args['uri']) else: entity = self.NS.local.Home #print(request.method, 'view()', entity, view) if request.method == 'POST': print ("uploading file",entity) if len(request.files) == 0: flash('No file uploaded') return redirect(request.url) upload_type = rdflib.URIRef(request.form['upload_type']) self.add_files(entity, [y for x, y in request.files.items(multi=True)], upload_type=upload_type) url = "/about?%s" % urlencode(dict(uri=str(entity), view="view")) print ("redirecting to",url) return redirect(url) elif request.method == 'DELETE': self.delete_file(entity) return '', 204 elif request.method == 'GET': resource = self.get_resource(entity) # 'view' is the default view fileid = resource.value(self.NS.whyis.hasFileID) if fileid is not None and 'view' not in request.args: print (resource.identifier, fileid) f = self.file_depot.get(fileid) fsa = FileServeApp(f, self.config["file_archive"].get("cache_max_age",3600*24*7)) return fsa if content_type is None: content_type = request.headers['Accept'] if 'Accept' in request.headers else 'text/turtle' #print entity fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],content_type) if 'view' in request.args or fmt in htmls: return render_view(resource) elif fmt in dataFormats: output_graph = ConjunctiveGraph() result, status, headers = render_view(resource, view='describe') output_graph.parse(data=result, format="json-ld") return output_graph.serialize(format=dataFormats[fmt]), 200, {'Content-Type':content_type} #elif 'view' in request.args or sadi.mimeparse.best_match(htmls, content_type) in htmls: else: return render_view(resource) views = {} def render_view(resource, view=None, args=None): template_args = dict() template_args.update(self.template_imports) template_args.update(dict( ns=self.NS, this=resource, g=g, current_user=current_user, isinstance=isinstance, args=request.args if args is None else args, url_for=url_for, get_entity=get_entity, get_summary=get_summary, search = search, rdflib=rdflib, config=self.config, hasattr=hasattr, set=set)) if view is None and 'view' in request.args: view = request.args['view'] if view is None: view = 'view' types = [] if 'as' in request.args: types = [URIRef(request.args['as']), 0] types.extend((x, 1) for x in self.vocab[resource.identifier : NS.RDF.type]) if not types: # KG types cannot override vocab types. This should keep views stable where critical. types.extend([(x.identifier, 1) for x in resource[NS.RDF.type]]) #if len(types) == 0: types.append([self.NS.RDFS.Resource, 100]) type_string = ' '.join(["(%s %d '%s')" % (x.n3(), i, view) for x, i in types]) view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c where { values (?c ?priority ?id) { %s } ?c rdfs:subClassOf* ?mid. ?mid rdfs:subClassOf* ?class. ?class ?viewProperty ?view. ?viewProperty rdfs:subPropertyOf* whyis:hasView. ?viewProperty dc:identifier ?id. } group by ?c ?class order by ?rank ''' % type_string #print view_query views = list(self.vocab.query(view_query, initNs=dict(whyis=self.NS.whyis, dc=self.NS.dc))) if len(views) == 0: abort(404) headers = {'Content-Type': "text/html"} extension = views[0]['view'].value.split(".")[-1] if extension in extensions: headers['Content-Type'] = extensions[extension] # default view (list of nanopubs) # if available, replace with class view # if available, replace with instance view return render_template(views[0]['view'].value, **template_args), 200, headers self.render_view = render_view def render_nanopub(data, code, headers=None): if data is None: return make_response("<h1>Not Found</h1>", 404) entity = app.Entity(ConjunctiveGraph(data.store), data.identifier) entity.nanopub = data data, code, headers = render_view(entity) resp = make_response(data, code) resp.headers.extend(headers or {}) return resp app = self self.nanopub_manager = NanopublicationManager(app.db.store, Namespace('%s/pub/'%(app.config['lod_prefix'])), self, update_listener=self.nanopub_update_listener) def _get_graph(): inputGraph = ConjunctiveGraph() contentType = request.headers['Content-Type'] encoding = 'utf8' if not request.content_encoding else request.content_encoding content = str(request.data, encoding) fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],contentType) if fmt in dataFormats: inputGraph.parse(data=content, format=dataFormats[fmt]) return inputGraph #decorators = [conditional_login_required] def _get_uri(ident): return URIRef('%s/pub/%s'%(app.config['lod_prefix'], ident)) @self.route('/pub/<ident>',methods=['GET']) @self.route('/pub/<ident>.<format>', methods=['GET']) @conditional_login_required def get_nanopub(ident, format=None): #print(request.method, 'get_nanopub()', ident) ident = ident.split("_")[0] uri = _get_uri(ident) result = app.nanopub_manager.get(uri) if result is None: #print("cannot find", uri) abort(404) content_type = None if format is not None and format in extensions: content_type = extensions[format] if content_type is None: content_type = request.headers['Accept'] if 'Accept' in request.headers else 'application/ld+json' fmt = sadi.mimeparse.best_match([mt for mt in list(dataFormats.keys()) if mt is not None],content_type) if 'view' in request.args or fmt in htmls: return render_nanopub(result, 200) elif fmt in dataFormats: response = Response(result.serialize(format=dataFormats[fmt])) response.headers = {'Content-type': fmt} return response, 200 @self.route('/pub/<ident>', methods=['DELETE']) @login_required def delete_nanopub(ident): #print(request.method, 'delete_nanopub()', ident) ident = ident.split("_")[0] uri = _get_uri(ident) if not app._can_edit(uri): return '<h1>Not Authorized</h1>', 401 app.nanopub_manager.retire(uri) return '', 204 @self.route('/pub/<ident>', methods=['PUT']) @login_required def put_nanopub(ident): #print(request.method, 'put_nanopub()', ident) nanopub_uri = _get_uri(ident) inputGraph = _get_graph() old_nanopub = _prep_nanopub(nanopub_uri, inputGraph) for nanopub in app.nanopub_manager.prepare(inputGraph): nanopub.pubinfo.set((nanopub.assertion.identifier, app.NS.prov.wasRevisionOf, old_nanopub.assertion.identifier)) app.nanopub_manager.retire(nanopub_uri) app.nanopub_manager.publish(nanopub) def _prep_nanopub(nanopub): #nanopub = Nanopublication(store=graph.store, identifier=nanopub_uri) about = nanopub.nanopub_resource.value(app.NS.sio.isAbout) #print nanopub.assertion_resource.identifier, about _prep_graph(nanopub.assertion_resource, about.identifier if about is not None else None) #_prep_graph(nanopub.pubinfo_resource, nanopub.assertion_resource.identifier) _prep_graph(nanopub.provenance_resource, nanopub.assertion_resource.identifier) nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.dc.contributor, current_user.identifier)) return nanopub @self.route('/pub/<ident>', methods=['POST']) @self.route('/pub', methods=['POST']) @login_required def post_nanopub(ident=None): #print(request.method, 'post_nanopub()', ident) if ident is not None: return self.put(ident) inputGraph = _get_graph() #for nanopub_uri in inputGraph.subjects(rdflib.RDF.type, app.NS.np.Nanopublication): #nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.dc.created, Literal(datetime.utcnow()))) headers = {} for nanopub in app.nanopub_manager.prepare(inputGraph): _prep_nanopub(nanopub) headers['Location'] = nanopub.identifier app.nanopub_manager.publish(nanopub) return '', 201, headers def _prep_graph(resource, about = None): #print '_prep_graph', resource.identifier, about content_type = resource.value(app.NS.ov.hasContentType) if content_type is not None: content_type = content_type.value #print 'graph content type', resource.identifier, content_type #print resource.graph.serialize(format="nquads") g = Graph(store=resource.graph.store,identifier=resource.identifier) text = resource.value(app.NS.prov.value) if content_type is not None and text is not None: #print 'Content type:', content_type, resource.identifier html = None if content_type in ["text/html", "application/xhtml+xml"]: html = Literal(text.value, datatype=NS.RDF.HTML) if content_type == 'text/markdown': #print "Aha, markdown!" #print text.value html = markdown.markdown(text.value) attributes = ['vocab="%s"' % app.NS.local, 'base="%s"'% app.NS.local, 'prefix="%s"' % ' '.join(['%s: %s'% x for x in list(app.NS.prefixes.items())])] if about is not None: attributes.append('resource="%s"' % about) html = '<div %s>%s</div>' % (' '.join(attributes), html) html = Literal(html, datatype=NS.RDF.HTML) text = html content_type = "text/html" #print resource.identifier, content_type if html is not None: resource.set(app.NS.sioc.content, html) try: g.remove((None,None,None)) g.parse(data=text, format='rdfa', publicID=app.NS.local) except: pass else: #print "Deserializing", g.identifier, 'as', content_type #print dataFormats if content_type in dataFormats: g.parse(data=text, format=dataFormats[content_type], publicID=app.NS.local) #print len(g) #else: #print("not attempting to deserialize.") # try: # sadi.deserialize(g, text, content_type) # except: # pass #print Graph(store=resource.graph.store).serialize(format="trig") def get_send_file_max_age(self, filename): if self.debug: return 0 else: return Empty.get_send_file_max_age(self, filename)
if not instance: instance = MastodonInstance(instance=acct.mastodon_instance, popularity=10) db.session.add(instance) amount = 0.01 if acct.policy_enabled: amount = 0.5 for _ in acct.sessions: amount += 0.1 instance.bump(amount / max(1, instance.popularity)) # normalise scores so the top is 20 top_pop = (db.session.query(db.func.max( MastodonInstance.popularity)).scalar()) MastodonInstance.query.update({ MastodonInstance.popularity: MastodonInstance.popularity * 20 / top_pop }) db.session.commit() app.add_periodic_task(40, queue_fetch_for_most_stale_accounts) app.add_periodic_task(9, queue_deletes) app.add_periodic_task(6, refresh_account_with_oldest_post) app.add_periodic_task(50, refresh_account_with_longest_time_since_refresh) app.add_periodic_task(300, periodic_cleanup) app.add_periodic_task(300, update_mastodon_instances_popularity) if __name__ == '__main__': app.worker_main()
def setup_periodic_tasks(sender: Celery, **kwargs): sender.add_periodic_task(60 * 60, periodic_curse_login.s()) sender.add_periodic_task(15 * 60, periodic_remove_expired_caches.s()) sender.add_periodic_task(15 * 60, periodic_fill_missing_addons.s()) # todo: replacement for periodic feeds sender.add_periodic_task(24 * 60 * 60, periodic_find_hidden_addons.s()) # daily sender.add_periodic_task(7 * 24 * 60 * 60, periodic_request_all_files.s()) # weekly sender.add_periodic_task(7 * 24 * 60 * 60, periodic_request_all_addons.s()) # weekly sender.add_periodic_task(crontab(minute='0', hour='*'), periodic_keep_history.s()) # every hour at XX:00 periodic_fill_missing_addons.apply_async(countdown=30) # Mainly for staging, so we don't redo a full dl every time the env restart if it's been less than a day. # The hourly & daily's will get it. last = redis_store.get('periodic-find_hidden_addons-last') if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1): periodic_find_hidden_addons.apply_async(countdown=60 * 60) last = redis_store.get('periodic-request_all_files-last') if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1): periodic_request_all_files.apply_async(countdown=4 * 60 * 60) last = redis_store.get('periodic-request_all_addons-last') if last is None or datetime.now() - datetime.fromtimestamp(int(last)) > timedelta(days=1): periodic_request_all_addons.apply_async(countdown=4 * 60 * 60)
import os from celery import Celery # set the default Django settings module for the 'celery' program. from offers.tasks import OfferRefresher os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mieszkania_wwa.settings') app = Celery('mieszkania_wwa', broker='redis://localhost') @app.task def refresh_offers(): OfferRefresher.instance().refresh() # Using a string here means the worker doesn't have to serialize # the configuration object to child processes. # - namespace='CELERY' means all celery-related configuration keys # should have a `CELERY_` prefix. app.config_from_object('django.conf:settings', namespace='CELERY') # Load task modules from all registered Django app configs. app.autodiscover_tasks() app.add_periodic_task(30.0, refresh_offers, name='refresh 30') app.conf.timezone = 'UTC'
class App(Empty): def configure_extensions(self): Empty.configure_extensions(self) self.celery = Celery(self.name, broker=self.config['CELERY_BROKER_URL'], beat=True) self.celery.conf.update(self.config) app = self if 'root_path' in self.config: self.root_path = self.config['root_path'] if 'SATORU_TEMPLATE_DIR' in self.config and app.config[ 'SATORU_TEMPLATE_DIR'] is not None: my_loader = jinja2.ChoiceLoader([ jinja2.FileSystemLoader(p) for p in self.config['SATORU_TEMPLATE_DIR'] ] + [app.jinja_loader]) app.jinja_loader = my_loader def setup_task(service): service.app = app print service result = None if service.query_predicate == self.NS.graphene.globalChangeQuery: result = process_resource else: result = process_nanopub result.service = lambda: service return result @self.celery.task def process_resource(service_name): service = self.config['inferencers'][service_name] print service service.process_graph(app.db) @self.celery.task def process_nanopub(nanopub_uri, service_name): service = self.config['inferencers'][service_name] print service, nanopub_uri nanopub = app.nanopub_manager.get(nanopub_uri) service.process_graph(nanopub) def setup_periodic_task(task): @self.celery.task def find_instances(): print "Triggered task", task['name'] for x, in app.db.query(task['service'].get_query()): task['do'](x) @self.celery.task def do_task(uri): print "Running task", task['name'], 'on', uri resource = app.get_resource(uri) result = task['service'].process_graph(resource.graph) task['service'].app = app task['find_instances'] = find_instances task['do'] = do_task return task app.inference_tasks = [] if 'inference_tasks' in self.config: app.inference_tasks = [ setup_periodic_task(task) for task in self.config['inference_tasks'] ] for task in app.inference_tasks: if 'schedule' in task: #print "Scheduling task", task['name'], task['schedule'] self.celery.add_periodic_task(crontab(**task['schedule']), task['find_instances'].s(), name=task['name']) else: task['find_instances'].delay() @self.celery.task() def update(nanopub_uri): '''gets called whenever there is a change in the knowledge graph. Performs a breadth-first knowledge expansion of the current change.''' #print "Updating on", nanopub_uri nanopub = app.nanopub_manager.get(nanopub_uri) nanopub_graph = ConjunctiveGraph(nanopub.store) if 'inferencers' in self.config: for name, service in self.config['inferencers'].items(): service.app = self if service.query_predicate == self.NS.graphene.globalChangeQuery: #print "checking", name, service.get_query() process_resource(name) if service.query_predicate == self.NS.graphene.updateChangeQuery: #print "checking", name, nanopub_uri, service.get_query() if len(list(nanopub_graph.query( service.get_query()))) > 0: print "invoking", name, nanopub_uri process_nanopub(nanopub_uri, name) def run_update(nanopub_uri): update.delay(nanopub_uri) self.nanopub_update_listener = run_update @self.celery.task(retry_backoff=True, retry_jitter=True, autoretry_for=(Exception, ), max_retries=4) def run_importer(entity_name): importer = self.find_importer(entity_name) modified = importer.last_modified(entity_name, self.db, self.nanopub_manager) updated = importer.modified(entity_name) if updated is None: updated = datetime.now(pytz.utc) print "Remote modified:", updated, type( updated), "Local modified:", modified, type(modified) if modified is None or (updated - modified ).total_seconds() > importer.min_modified: importer.load(entity_name, self.db, self.nanopub_manager) self.run_importer = run_importer def configure_database(self): """ Database configuration should be set here """ self.NS = NS self.NS.local = rdflib.Namespace(self.config['lod_prefix'] + '/') self.admin_db = database.engine_from_config(self.config, "admin_") self.db = database.engine_from_config(self.config, "knowledge_") self.db.app = self load_namespaces(self.db, locals()) Resource.db = self.admin_db self.vocab = Graph() #print URIRef(self.config['vocab_file']) self.vocab.load(open("default_vocab.ttl"), format="turtle") self.vocab.load(open(self.config['vocab_file']), format="turtle") self.role_api = ld.LocalResource(self.NS.prov.Role, "role", self.admin_db.store, self.vocab, self.config['lod_prefix'], RoleMixin) self.Role = self.role_api.alchemy self.user_api = ld.LocalResource(self.NS.prov.Agent, "user", self.admin_db.store, self.vocab, self.config['lod_prefix'], UserMixin) self.User = self.user_api.alchemy self.nanopub_api = ld.LocalResource(self.NS.np.Nanopublication, "pub", self.db.store, self.vocab, self.config['lod_prefix'], name="Graph") self.Nanopub = self.nanopub_api.alchemy self.classes = mapper(self.Role, self.User) self.datastore = RDFAlchemyUserDatastore(self.admin_db, self.classes, self.User, self.Role) self.security = Security(self, self.datastore, register_form=ExtendedRegisterForm) #self.mail = Mail(self) DepotManager.configure('nanopublications', self.config['nanopub_archive']) DepotManager.configure('files', self.config['file_archive']) self.file_depot = DepotManager.get('files') def weighted_route(self, *args, **kwargs): def decorator(view_func): compare_key = kwargs.pop('compare_key', None) # register view_func with route self.route(*args, **kwargs)(view_func) if compare_key is not None: rule = self.url_map._rules[-1] rule.match_compare_key = lambda: compare_key return view_func return decorator def map_entity(self, name): for importer in self.config['namespaces']: if importer.matches(name): new_name = importer.map(name) #print 'Found mapped URI', new_name return new_name, importer return None, None def find_importer(self, name): for importer in self.config['namespaces']: if importer.resource_matches(name): return importer return None class Entity(rdflib.resource.Resource): _this = None def this(self): if self._this is None: self._this = self._graph.app.get_entity(self.identifier) return self._this _description = None def description(self): if self._description is None: # try: result = Graph() # try: for s, p, o, c in self._graph.query( ''' construct { ?e ?p ?o. ?o rdfs:label ?label. ?o skos:prefLabel ?prefLabel. ?o dc:title ?title. ?o foaf:name ?name. ?o ?pattr ?oatter. ?oattr rdfs:label ?oattrlabel } where { graph ?g { ?e ?p ?o. } ?g a np:Assertion. optional { ?e sio:hasAttribute|sio:hasPart ?o. ?o ?pattr ?oattr. optional { ?oattr rdfs:label ?oattrlabel. } } optional { ?o rdfs:label ?label. } optional { ?o skos:prefLabel ?prefLabel. } optional { ?o dc:title ?title. } optional { ?o foaf:name ?name. } }''', initNs=NS.prefixes, initBindings={'e': self.identifier}): result.add((s, p, o)) # except: # pass self._description = result.resource(self.identifier) # except Exception as e: # print str(e), self.identifier # raise e return self._description def get_resource(self, entity): mapped_name, importer = self.map_entity(entity) if mapped_name is not None: entity = mapped_name if importer is None: importer = self.find_importer(entity) if importer is not None: modified = importer.last_modified(entity, self.db, self.nanopub_manager) if modified is None: self.run_importer(entity) else: print "Type of modified is", type(modified) self.run_importer.delay(entity) return self.Entity(self.db, entity) def configure_template_filters(self): import urllib from markupsafe import Markup @self.template_filter('urlencode') def urlencode_filter(s): if type(s) == 'Markup': s = s.unescape() s = s.encode('utf8') s = urllib.quote_plus(s) return Markup(s) @self.template_filter('lang') def lang_filter(terms): terms = list(terms) if terms is None or len(terms) == 0: return [] resources = [x for x in terms if not isinstance(x, rdflib.Literal)] literals = [x for x in terms if isinstance(x, rdflib.Literal)] languages = set( [x.language for x in literals if x.language is not None]) best_lang = request.accept_languages.best_match(list(languages)) best_terms = [x for x in literals if x.language == best_lang] if len(best_terms) == 0: best_terms = [ x for x in literals if x.language == self.config['default_language'] ] if len(best_terms) > 0: return resources + best_terms return resources self.lang_filter = lang_filter def add_file(self, f, entity, nanopub): old_nanopubs = [] for np_uri, np_assertion, in self.db.query( '''select distinct ?np ?assertion where { graph ?assertion {?e graphene:hasFileID ?fileid} ?np np:hasAssertion ?assertion. }''', initNs=NS.prefixes, initBindings=dict(e=entity)): if not self._can_edit(np_uri): raise Unauthorized() old_nanopubs.append((np_uri, np_assertion)) fileid = self.file_depot.create(f.stream, f.filename, f.mimetype) nanopub.assertion.add((entity, NS.graphene.hasFileID, Literal(fileid))) nanopub.assertion.add((entity, NS.dc.contributor, current_user.resUri)) nanopub.assertion.add( (entity, NS.dc.created, Literal(datetime.utcnow()))) nanopub.assertion.add( (entity, NS.ov.hasContentType, Literal(f.mimetype))) nanopub.assertion.add((entity, NS.RDF.type, NS.mediaTypes[f.mimetype])) nanopub.assertion.add( (NS.mediaTypes[f.mimetype], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add( (entity, NS.RDF.type, NS.mediaTypes[f.mimetype.split('/')[0]])) nanopub.assertion.add((NS.mediaTypes[f.mimetype.split('/')[0]], NS.RDF.type, NS.dc.FileFormat)) nanopub.assertion.add((entity, NS.RDF.type, NS.pv.File)) nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.contributor, current_user.resUri)) nanopub.pubinfo.add((nanopub.assertion.identifier, NS.dc.created, Literal(datetime.utcnow()))) return old_nanopubs def delete_file(self, entity): for np_uri, in self.db.query('''select distinct ?np where { graph ?np_assertion {?e graphene:hasFileID ?fileid} ?np np:hasAssertion ?np_assertion. }''', initNs=NS.prefixes, initBindings=dict(e=entity)): if not self._can_edit(np_uri): raise Unauthorized() self.nanopub_manager.retire(np_uri) def add_files(self, uri, files, upload_type=NS.pv.File): nanopub = self.nanopub_manager.new() added_files = False old_nanopubs = [] nanopub.assertion.add((uri, self.NS.RDF.type, upload_type)) if upload_type == URIRef("http://purl.org/dc/dcmitype/Collection"): for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri + "/" + filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add((uri, NS.dc.hasPart, file_uri)) added_files = True elif upload_type == NS.dcat.Dataset: for f in files: filename = secure_filename(f.filename) if filename != '': file_uri = URIRef(uri + "/" + filename) old_nanopubs.extend(self.add_file(f, file_uri, nanopub)) nanopub.assertion.add( (uri, NS.dcat.distribution, file_uri)) nanopub.assertion.add( (file_uri, NS.RDF.type, NS.dcat.Distribution)) nanopub.assertion.add( (file_uri, NS.dcat.downloadURL, file_uri)) added_files = True else: for f in files: if f.filename != '': old_nanopubs.extend(self.add_file(f, uri, nanopub)) nanopub.assertion.add((uri, ns.RDF.type, NS.pv.File)) added_files = True break if added_files: for old_np, old_np_assertion in old_nanopubs: nanopub.pubinfo.add((nanopub.assertion.identifier, NS.prov.wasRevisionOf, old_np_assertion)) self.nanopub_manager.retire(old_np) for n in self.nanopub_manager.prepare(nanopub): self.nanopub_manager.publish(n) def _can_edit(self, uri): if current_user.has_role('Publisher') or current_user.has_role( 'Editor') or current_user.has_role('Admin'): return True if self.db.query('''ask { ?nanopub np:hasAssertion ?assertion; np:hasPublicationInfo ?info. graph ?info { ?assertion dc:contributor ?user. } }''', initBindings=dict(nanopub=uri, user=current_user.resUri), initNs=dict(np=self.NS.np, dc=self.NS.dc)): #print "Is owner." return True return False def configure_views(self): def sort_by(resources, property): return sorted(resources, key=lambda x: x.value(property)) class InvitedAnonymousUser(AnonymousUserMixin): '''A user that has been referred via kikm references but does not have a user account.''' def __init__(self): self.roles = ImmutableList() def has_role(self, *args): """Returns `False`""" return False def is_active(self): return True @property def is_authenticated(self): return True def camel_case_split(identifier): matches = finditer( '.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier) return [m.group(0) for m in matches] label_properties = [ self.NS.skos.prefLabel, self.NS.RDFS.label, self.NS.dc.title, self.NS.foaf.name ] @lru def get_remote_label(uri): for db in [self.db, self.admin_db]: g = Graph() try: g += db.query('''select ?s ?p ?o where { ?s ?p ?o.}''', initNs=self.NS.prefixes, initBindings=dict(s=uri)) except: pass resource_entity = g.resource(uri) if len(resource_entity.graph) == 0: #print "skipping", db continue for property in label_properties: labels = self.lang_filter(resource_entity[property]) if len(labels) > 0: return labels[0] if len(labels) == 0: name = [ x.value for x in [ resource_entity.value(self.NS.foaf.givenName), resource_entity.value(self.NS.foaf.familyName) ] if x is not None ] if len(name) > 0: label = ' '.join(name) return label try: label = self.db.qname(uri).split(":")[1].replace("_", " ") return ' '.join(camel_case_split(label)).title() except Exception as e: print str(e), uri return str(uri) def get_label(resource): for property in label_properties: labels = self.lang_filter(resource[property]) #print "mem", property, label if len(labels) > 0: return labels[0] return get_remote_label(resource.identifier) @self.before_request def load_forms(): if 'API_KEY' in self.config: if 'API_KEY' in request.args and request.args[ 'API_KEY'] == self.config['API_KEY']: print 'logging in invited user' login_user(InvitedAnonymousUser()) #g.search_form = SearchForm() g.ns = self.NS g.get_summary = get_summary g.get_label = get_label g.get_entity = self.get_entity g.rdflib = rdflib g.isinstance = isinstance g.db = self.db @self.login_manager.user_loader def load_user(user_id): if user_id != None: #try: return self.datastore.find_user(id=user_id) #except: # return None else: return None extensions = { "rdf": "application/rdf+xml", "jsonld": "application/ld+json", "json": "application/json", "ttl": "text/turtle", "trig": "application/trig", "turtle": "text/turtle", "owl": "application/rdf+xml", "nq": "application/n-quads", "nt": "application/n-triples", "html": "text/html" } dataFormats = { "application/rdf+xml": "xml", "application/ld+json": 'json-ld', "text/turtle": "turtle", "application/trig": "trig", "application/n-quads": "nquads", "application/n-triples": "nt", "application/rdf+json": "json", None: "json-ld" } def get_graphs(graphs): query = 'select ?s ?p ?o ?g where {graph ?g {?s ?p ?o} } values ?g { %s }' query = query % ' '.join([graph.n3() for graph in graphs]) #print query quads = self.db.store.query(query) result = Dataset() result.addN(quads) return result def explain(graph): values = ')\n ('.join([ ' '.join([x.n3() for x in triple]) for triple in graph.triples((None, None, None)) ]) values = 'VALUES (?s ?p ?o)\n{\n(' + values + ')\n}' try: nanopubs = self.db.query('''select distinct ?np where { ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; graph ?assertion { ?s ?p ?o.} }''' + values, initNs={ 'np': self.NS.np, 'sio': self.NS.sio, 'dc': self.NS.dc, 'foaf': self.NS.foaf }) result = ConjunctiveGraph() for nanopub_uri, in nanopubs: self.nanopub_manager.get(nanopub_uri, result) except Exception as e: print str(e), entity raise e return result.resource(entity) def get_entity_sparql(entity): try: statements = self.db.query( '''select distinct ?s ?p ?o ?g where { ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; {graph ?np { ?np sio:isAbout ?e.}} UNION {graph ?assertion { ?e ?p ?o.}} graph ?g { ?s ?p ?o } }''', initBindings={'e': entity}, initNs={ 'np': self.NS.np, 'sio': self.NS.sio, 'dc': self.NS.dc, 'foaf': self.NS.foaf }) result = ConjunctiveGraph() result.addN(statements) except Exception as e: print str(e), entity raise e #print result.serialize(format="trig") return result.resource(entity) def get_entity_disk(entity): try: nanopubs = self.db.query('''select distinct ?np where { ?np np:hasAssertion?|np:hasProvenance?|np:hasPublicationInfo? ?g; np:hasPublicationInfo ?pubinfo; np:hasAssertion ?assertion; {graph ?np { ?np sio:isAbout ?e.}} UNION {graph ?assertion { ?e ?p ?o.}} }''', initBindings={'e': entity}, initNs={ 'np': self.NS.np, 'sio': self.NS.sio, 'dc': self.NS.dc, 'foaf': self.NS.foaf }) result = ConjunctiveGraph() for nanopub_uri, in nanopubs: self.nanopub_manager.get(nanopub_uri, result) # result.addN(nanopubs) except Exception as e: print str(e), entity raise e #print result.serialize(format="trig") return result.resource(entity) get_entity = get_entity_sparql self.get_entity = get_entity def get_summary(resource): summary_properties = [ self.NS.skos.definition, self.NS.dc.abstract, self.NS.dc.description, self.NS.dc.summary, self.NS.RDFS.comment, self.NS.dcelements.description ] for property in summary_properties: terms = self.lang_filter(resource[property]) for term in terms: yield (property, term) self.get_summary = get_summary @self.route('/sparql', methods=['GET', 'POST']) @login_required def sparql_view(): has_query = False for arg in request.args.keys(): if arg.lower() == "update": return "Update not allowed.", 403 if arg.lower() == 'query': has_query = True if request.method == 'GET' and not has_query: return redirect(url_for('sparql_form')) #print self.db.store.query_endpoint if request.method == 'GET': headers = {} headers.update(request.headers) if 'Content-Length' in headers: del headers['Content-Length'] req = requests.get(self.db.store.query_endpoint, headers=headers, params=request.args) elif request.method == 'POST': if 'application/sparql-update' in request.headers[ 'content-type']: return "Update not allowed.", 403 req = requests.post(self.db.store.query_endpoint, data=request.get_data(), headers=request.headers, params=request.args) #print self.db.store.query_endpoint #print req.status_code response = Response(req.content, content_type=req.headers['content-type']) #response.headers[con(req.headers) return response, req.status_code @self.route('/sparql.html') @login_required def sparql_form(): template_args = dict(ns=self.NS, g=g, current_user=current_user, isinstance=isinstance, rdflib=rdflib, hasattr=hasattr, set=set) return render_template('sparql.html', endpoint="/sparql", **template_args) if 'SATORU_CDN_DIR' in self.config and self.config[ 'SATORU_CDN_DIR'] is not None: @self.route('/cdn/<path:filename>') def cdn(filename): return send_from_directory( self.config['SATORU_CDN_DIR'], werkzeug.utils.secure_filename(filename)) @self.route('/about.<format>', methods=['GET', 'POST', 'DELETE']) @self.route('/about', methods=['GET', 'POST', 'DELETE']) @self.weighted_route('/<path:name>.<format>', compare_key=bottom_compare_key, methods=['GET', 'POST', 'DELETE']) @self.weighted_route('/<path:name>', compare_key=bottom_compare_key, methods=['GET', 'POST', 'DELETE']) @self.route('/', methods=['GET', 'POST', 'DELETE']) @login_required def view(name=None, format=None, view=None): if format is not None: if format in extensions: content_type = extensions[format] else: name = '.'.join([name, format]) if name is not None: entity = self.NS.local[name] elif 'uri' in request.args: entity = URIRef(request.args['uri']) else: entity = self.NS.local.Home if request.method == 'POST': if len(request.files) == 0: flash('No file uploaded') return redirect(request.url) upload_type = rdflib.URIRef(request.form['upload_type']) self.add_files( entity, [y for x, y in request.files.iteritems(multi=True)], upload_type=upload_type) url = "/about?%s" % urlencode( dict(uri=unicode(entity), view="view")) return redirect(url) elif request.method == 'DELETE': self.delete_file(entity) return '', 204 elif request.method == 'GET': resource = self.get_resource(entity) content_type = request.headers[ 'Accept'] if 'Accept' in request.headers else '*/*' #print entity htmls = set(['application/xhtml', 'text/html']) if 'view' in request.args or sadi.mimeparse.best_match( htmls, content_type) in htmls: return render_view(resource) else: fmt = dataFormats[sadi.mimeparse.best_match( [mt for mt in dataFormats.keys() if mt is not None], content_type)] return resource.this().graph.serialize(format=fmt) views = {} def render_view(resource): template_args = dict(ns=self.NS, this=resource, g=g, current_user=current_user, isinstance=isinstance, get_entity=get_entity, get_summary=get_summary, rdflib=rdflib, hasattr=hasattr, set=set) view = None if 'view' in request.args: view = request.args['view'] # 'view' is the default view fileid = resource.value(self.NS.graphene.hasFileID) if fileid is not None and view is None: f = self.file_depot.get(fileid) fsa = FileServeApp( f, self.config["file_archive"].get("cache_max_age", 3600 * 24 * 7)) return fsa if view is None: view = 'view' if 'as' in request.args: types = [URIRef(request.args['as']), 0] else: types = list([(x.identifier, 0) for x in resource[RDF.type]]) #print types #if len(types) == 0: types.append([self.NS.RDFS.Resource, 100]) #print view, resource.identifier, types type_string = ' '.join( ["(%s %d '%s')" % (x.n3(), i, view) for x, i in types]) view_query = '''select ?id ?view (count(?mid)+?priority as ?rank) ?class ?c where { values (?c ?priority ?id) { %s } ?c rdfs:subClassOf* ?mid. ?mid rdfs:subClassOf* ?class. ?class ?viewProperty ?view. ?viewProperty rdfs:subPropertyOf* graphene:hasView. ?viewProperty dc:identifier ?id. } group by ?c ?class order by ?rank ''' % type_string #print view_query views = list( self.vocab.query(view_query, initNs=dict(graphene=self.NS.graphene, dc=self.NS.dc))) #print '\n'.join([str(x.asdict()) for x in views]) if len(views) == 0: abort(404) headers = {'Content-Type': "text/html"} extension = views[0]['view'].value.split(".")[-1] if extension in extensions: headers['Content-Type'] = extensions[extension] # default view (list of nanopubs) # if available, replace with class view # if available, replace with instance view return render_template(views[0]['view'].value, **template_args), 200, headers def render_nanopub(data, code, headers=None): entity = app.Entity(ConjunctiveGraph(data.store), data.identifier) entity.nanopub = data data, code, headers = render_view(entity) resp = make_response(data, code) resp.headers.extend(headers or {}) return resp self.api = ld.LinkedDataApi(self, "", self.db.store, "") self.api.representations['text/html'] = render_nanopub #self.admin = Admin(self, name="graphene", template_mode='bootstrap3') #self.admin.add_view(ld.ModelView(self.nanopub_api, default_sort=RDFS.label)) #self.admin.add_view(ld.ModelView(self.role_api, default_sort=RDFS.label)) #self.admin.add_view(ld.ModelView(self.user_api, default_sort=foaf.familyName)) app = self self.nanopub_manager = NanopublicationManager( app.db.store, Namespace('%s/pub/' % (app.config['lod_prefix'])), update_listener=self.nanopub_update_listener) class NanopublicationResource(ld.LinkedDataResource): decorators = [login_required] def __init__(self): self.local_resource = app.nanopub_api def _get_uri(self, ident): return URIRef('%s/pub/%s' % (app.config['lod_prefix'], ident)) def get(self, ident): ident = ident.split("_")[0] uri = self._get_uri(ident) try: result = app.nanopub_manager.get(uri) except IOError: return 'Resource not found', 404 return result def delete(self, ident): uri = self._get_uri(ident) if not app._can_edit(uri): return '<h1>Not Authorized</h1>', 401 app.nanopub_manager.retire(uri) #self.local_resource.delete(uri) return '', 204 def _get_graph(self): inputGraph = ConjunctiveGraph() contentType = request.headers['Content-Type'] sadi.deserialize(inputGraph, request.data, contentType) return inputGraph def put(self, ident): nanopub_uri = self._get_uri(ident) inputGraph = self._get_graph() old_nanopub = self._prep_nanopub(nanopub_uri, inputGraph) for nanopub in app.nanopub_manager.prepare(inputGraph): modified = Literal(datetime.utcnow()) nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.prov.wasRevisionOf, old_nanopub.assertion.identifier)) nanopub.pubinfo.add( (old_nanopub.assertion.identifier, app.NS.prov.invalidatedAtTime, modified)) nanopub.pubinfo.add((nanopub.assertion.identifier, app.NS.dc.modified, modified)) app.nanopub_manager.retire(nanopub_uri) app.nanopub_manager.publish(nanopub) def _prep_nanopub(self, nanopub_uri, graph): nanopub = Nanopublication(store=graph.store, identifier=nanopub_uri) about = nanopub.nanopub_resource.value(app.NS.sio.isAbout) #print nanopub.assertion_resource.identifier, about self._prep_graph(nanopub.assertion_resource, about.identifier) self._prep_graph(nanopub.pubinfo_resource, nanopub.assertion_resource.identifier) self._prep_graph(nanopub.provenance_resource, nanopub.assertion_resource.identifier) nanopub.pubinfo.add( (nanopub.assertion.identifier, app.NS.dc.contributor, current_user.resUri)) return nanopub def post(self, ident=None): if ident is not None: return self.put(ident) inputGraph = self._get_graph() for nanopub_uri in inputGraph.subjects( rdflib.RDF.type, app.NS.np.Nanopublication): nanopub = self._prep_nanopub(nanopub_uri, inputGraph) nanopub.pubinfo.add( (nanopub.assertion.identifier, app.NS.dc.created, Literal(datetime.utcnow()))) for nanopub in app.nanopub_manager.prepare(inputGraph): app.nanopub_manager.publish(nanopub) return '', 201 def _prep_graph(self, resource, about=None): #print '_prep_graph', resource.identifier, about content_type = resource.value(app.NS.ov.hasContentType) if content_type is not None: content_type = content_type.value #print 'graph content type', resource.identifier, content_type #print resource.graph.serialize(format="nquads") g = Graph(store=resource.graph.store, identifier=resource.identifier) text = resource.value(app.NS.prov.value) if content_type is not None and text is not None: #print 'Content type:', content_type, resource.identifier html = None if content_type in ["text/html", "application/xhtml+xml"]: html = Literal(text.value, datatype=RDF.HTML) if content_type == 'text/markdown': #print "Aha, markdown!" #print text.value html = markdown.markdown(text.value, extensions=['rdfa']) attributes = [ 'vocab="%s"' % app.NS.local, 'base="%s"' % app.NS.local, 'prefix="%s"' % ' '.join([ '%s: %s' % x for x in app.NS.prefixes.items() ]) ] if about is not None: attributes.append('resource="%s"' % about) html = '<div %s>%s</div>' % (' '.join(attributes), html) html = Literal(html, datatype=RDF.HTML) text = html content_type = "text/html" #print resource.identifier, content_type if html is not None: resource.add(app.NS.sioc.content, html) try: g.parse(data=text, format='rdfa') except: pass else: #print "Deserializing", g.identifier, 'as', content_type #print dataFormats if content_type in dataFormats: g.parse(data=text, format=dataFormats[content_type]) #print len(g) else: print "not attempting to deserialize." # try: # sadi.deserialize(g, text, content_type) # except: # pass #print Graph(store=resource.graph.store).serialize(format="trig") self.api.add_resource(NanopublicationResource, '/pub', '/pub/<ident>')
def setup_periodic_tasks(sender: Celery, **kwargs): sender.add_periodic_task(15 * 60, p_remove_expired_caches.s()) sender.add_periodic_task(15 * 60, p_curse_checklogin.s()) sender.add_periodic_task(15 * 60, p_update_all_addons.s()) sender.add_periodic_task(45 * 60, p_fill_incomplete_addons.s()) sender.add_periodic_task(6 * 60 * 60, p_update_all_files.s()) # sender.add_periodic_task(7 * 24 * 60 * 60, p_find_hidden_addons.s()) sender.add_periodic_task(crontab(minute='0'), p_keep_history.s())
def setup_periodic_tasks(sender: Celery, **kwargs): if not settings.DEBUG: sender.add_periodic_task(1.0, redis_celery_queue_depth.s(), name="1 sec queue probe", priority=0) # Heartbeat every 10sec to make sure the worker is alive sender.add_periodic_task(10.0, redis_heartbeat.s(), name="10 sec heartbeat", priority=0) # Update events table partitions twice a week sender.add_periodic_task( crontab(day_of_week="mon,fri", hour=0, minute=0), update_event_partitions.s(), # check twice a week ) # Send weekly status report on self-hosted instances if not getattr(settings, "MULTI_TENANCY", False): sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0), status_report.s()) # Cloud (posthog-cloud) cron jobs if getattr(settings, "MULTI_TENANCY", False): sender.add_periodic_task( crontab(hour=0, minute=0), calculate_billing_daily_usage.s()) # every day midnight UTC sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0), clean_stale_partials.s()) # delete old plugin logs every 4 hours sender.add_periodic_task(crontab(minute=0, hour="*/4"), delete_old_plugin_logs.s()) # sync all Organization.available_features every hour sender.add_periodic_task(crontab(minute=30, hour="*"), sync_all_organization_available_features.s()) sender.add_periodic_task(UPDATE_CACHED_DASHBOARD_ITEMS_INTERVAL_SECONDS, check_cached_items.s(), name="check dashboard items") sender.add_periodic_task(crontab(minute="*/15"), check_async_migration_health.s()) sender.add_periodic_task( crontab( hour=0, minute=randrange(0, 40) ), # every day at a random minute past midnight. Sends data from the preceding whole day. send_org_usage_report.s(), name="send event usage report", ) sender.add_periodic_task(120, clickhouse_lag.s(), name="clickhouse table lag") sender.add_periodic_task(120, clickhouse_row_count.s(), name="clickhouse events table row count") sender.add_periodic_task(120, clickhouse_part_count.s(), name="clickhouse table parts count") sender.add_periodic_task(120, clickhouse_mutation_count.s(), name="clickhouse table mutations count") sender.add_periodic_task(crontab(minute=0, hour="*"), calculate_cohort_ids_in_feature_flags_task.s()) sender.add_periodic_task( crontab(hour=0, minute=randrange(0, 40)), clickhouse_send_license_usage.s() ) # every day at a random minute past midnight. Randomize to avoid overloading license.posthog.com try: from ee.settings import MATERIALIZE_COLUMNS_SCHEDULE_CRON minute, hour, day_of_month, month_of_year, day_of_week = MATERIALIZE_COLUMNS_SCHEDULE_CRON.strip( ).split(" ") sender.add_periodic_task( crontab( minute=minute, hour=hour, day_of_month=day_of_month, month_of_year=month_of_year, day_of_week=day_of_week, ), clickhouse_materialize_columns.s(), name="clickhouse materialize columns", ) sender.add_periodic_task( crontab(hour="*/4", minute=0), clickhouse_mark_all_materialized.s(), name="clickhouse mark all columns as materialized", ) except Exception as err: capture_exception(err) print(f"Scheduling materialized column task failed: {err}") sender.add_periodic_task(120, calculate_cohort.s(), name="recalculate cohorts") if settings.ASYNC_EVENT_PROPERTY_USAGE: sender.add_periodic_task( EVENT_PROPERTY_USAGE_INTERVAL_SECONDS, calculate_event_property_usage.s(), name="calculate event property usage", )
# -*- coding:utf-8 -*- import os from celery import Celery from df_celery.tasks import periodic_task os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'daily_fresh.settings') app = Celery('daily_fresh') app.config_from_object('django.conf:settings', namespace='CELERY') app.autodiscover_tasks() app.add_periodic_task(5, periodic_task)