Esempio n. 1
0
    def configure_extensions(self):

        Empty.configure_extensions(self)
        self.celery = Celery(self.name, broker=self.config['CELERY_BROKER_URL'], beat=True)
        self.celery.conf.update(self.config)
        
        app = self

        self.redis = self.celery.broker_connection().default_channel.client
        
        if 'root_path' in self.config:
            self.root_path = self.config['root_path']
        
        if 'WHYIS_TEMPLATE_DIR' in self.config and app.config['WHYIS_TEMPLATE_DIR'] is not None:
            my_loader = jinja2.ChoiceLoader(
                [jinja2.FileSystemLoader(p) for p in self.config['WHYIS_TEMPLATE_DIR']] 
                + [app.jinja_loader]
            )
            app.jinja_loader = my_loader
        
        def setup_task(service):
            service.app = app
            print(service)
            result = None
            if service.query_predicate == self.NS.whyis.globalChangeQuery:
                result = process_resource
            else:
                result = process_nanopub
            result.service = lambda : service
            return result

        @self.celery.task
        def process_resource(service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            if is_waiting(service_name):
                print("Deferring to a later invocation.", service_name)
                return
            print(service_name)
            service.process_graph(app.db)

        @self.celery.task
        def process_nanopub(nanopub_uri, service_name, taskid=None):
            service = self.config['inferencers'][service_name]
            print(service, nanopub_uri)
            if app.nanopub_manager.is_current(nanopub_uri):
                nanopub = app.nanopub_manager.get(nanopub_uri)
                service.process_graph(nanopub)
            else:
                print("Skipping retired nanopub", nanopub_uri)

        def setup_periodic_task(task):
            @self.celery.task
            def find_instances():
                print("Triggered task", task['name'])
                for x, in task['service'].getInstances(app.db):
                    task['do'](x)
            
            @self.celery.task
            def do_task(uri):
                print("Running task", task['name'], 'on', uri)
                resource = app.get_resource(uri)

                # result never used
                task['service'].process_graph(resource.graph)

            task['service'].app = app
            task['find_instances'] = find_instances
            task['do'] = do_task

            return task
            
        app.inference_tasks = []
        if 'inference_tasks' in self.config:
            app.inference_tasks = [setup_periodic_task(task) for task in self.config['inference_tasks']]

        for name, task in list(self.config['inferencers'].items()):
            task.app = app
            
        for task in app.inference_tasks:
            if 'schedule' in task:
                #print "Scheduling task", task['name'], task['schedule']
                self.celery.add_periodic_task(
                    crontab(**task['schedule']),
                    task['find_instances'].s(),
                    name=task['name']
                )
            else:
                task['find_instances'].delay()

        def is_waiting(service_name):
            """
            Check if a task is waiting.
            """
            scheduled_tasks = list(inspect().scheduled().values())[0]
            for task in scheduled_tasks:
                if 'kwargs' in task:
                    args = eval(task['kwargs'])
                    if service_name == args.get('service_name',None):
                        return True
            return False
                
        def is_running_waiting(service_name):
            """
            Check if a task is running or waiting.
            """
            if is_waiting(service_name):
                return True
            running_tasks = list(inspect().active().values())[0]
            for task in running_tasks:
                if 'kwargs' in task:
                    args = eval(task['kwargs'])
                    if service_name == args.get('service_name',None):
                        return True
            return False
                        
        @self.celery.task()
        def update(nanopub_uri):
            '''gets called whenever there is a change in the knowledge graph.
            Performs a breadth-first knowledge expansion of the current change.'''
            #print "Updating on", nanopub_uri
            if not app.nanopub_manager.is_current(nanopub_uri):
                print("Skipping retired nanopub", nanopub_uri)
                return
            nanopub = app.nanopub_manager.get(nanopub_uri)
            nanopub_graph = ConjunctiveGraph(nanopub.store)
            if 'inferencers' in self.config:
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.updateChangeQuery:
                        #print "checking", name, nanopub_uri, service.get_query()
                        if service.getInstances(nanopub_graph):
                            print("invoking", name, nanopub_uri)
                            process_nanopub.apply_async(kwargs={'nanopub_uri': nanopub_uri, 'service_name':name}, priority=1 )
                for name, service in list(self.config['inferencers'].items()):
                    service.app = self
                    if service.query_predicate == self.NS.whyis.globalChangeQuery and not is_running_waiting(name):
                        #print "checking", name, service.get_query()
                        process_resource.apply_async(kwargs={'service_name':name}, priority=5)

        def run_update(nanopub_uri):
            update.apply_async(args=[nanopub_uri],priority=9)
        self.nanopub_update_listener = run_update

        def is_waiting_importer(entity_name, exclude=None):
            """
            Check if a task is running or waiting.
            """
            if inspect().scheduled():
                tasks = list(inspect().scheduled().values())
                for task in tasks:
                    if 'args' in task and entity_name in task['args']:
                        return True
            return False

        app = self
        @self.celery.task(retry_backoff=True, retry_jitter=True,autoretry_for=(Exception,),max_retries=4, bind=True)
        def run_importer(self, entity_name):
            entity_name = URIRef(entity_name)
            counter = app.redis.incr(("import",entity_name))
            if counter > 1:
                return
            print('importing', entity_name)
            importer = app.find_importer(entity_name)
            if importer is None:
                return
            importer.app = app
            modified = importer.last_modified(entity_name, app.db, app.nanopub_manager)
            updated = importer.modified(entity_name)
            if updated is None:
                updated = datetime.now(pytz.utc)
            print("Remote modified:", updated, type(updated), "Local modified:", modified, type(modified))
            if modified is None or (updated - modified).total_seconds() > importer.min_modified:
                importer.load(entity_name, app.db, app.nanopub_manager)
            app.redis.set(("import",entity_name),0)
        self.run_importer = run_importer

        self.template_imports = {}
        if 'template_imports' in self.config:
            for name, imp in list(self.config['template_imports'].items()):
                try:
                    m = importlib.import_module(imp)
                    self.template_imports[name] = m
                except Exception:
                    print("Error importing module %s into template variable %s." % (imp, name))
                    raise
Esempio n. 2
0
    def configure_extensions(self):
        Empty.configure_extensions(self)
        self.celery = Celery(self.name,
                             broker=self.config['CELERY_BROKER_URL'],
                             beat=True)
        self.celery.conf.update(self.config)

        app = self

        if 'root_path' in self.config:
            self.root_path = self.config['root_path']

        if 'SATORU_TEMPLATE_DIR' in self.config and app.config[
                'SATORU_TEMPLATE_DIR'] is not None:
            my_loader = jinja2.ChoiceLoader([
                jinja2.FileSystemLoader(p)
                for p in self.config['SATORU_TEMPLATE_DIR']
            ] + [app.jinja_loader])
            app.jinja_loader = my_loader

        def setup_task(service):
            service.app = app
            print service
            result = None
            if service.query_predicate == self.NS.graphene.globalChangeQuery:
                result = process_resource
            else:
                result = process_nanopub
            result.service = lambda: service
            return result

        @self.celery.task
        def process_resource(service_name):
            service = self.config['inferencers'][service_name]
            print service
            service.process_graph(app.db)

        @self.celery.task
        def process_nanopub(nanopub_uri, service_name):
            service = self.config['inferencers'][service_name]
            print service, nanopub_uri
            nanopub = app.nanopub_manager.get(nanopub_uri)
            service.process_graph(nanopub)

        def setup_periodic_task(task):
            @self.celery.task
            def find_instances():
                print "Triggered task", task['name']
                for x, in app.db.query(task['service'].get_query()):
                    task['do'](x)

            @self.celery.task
            def do_task(uri):
                print "Running task", task['name'], 'on', uri
                resource = app.get_resource(uri)
                result = task['service'].process_graph(resource.graph)

            task['service'].app = app
            task['find_instances'] = find_instances
            task['do'] = do_task

            return task

        app.inference_tasks = []
        if 'inference_tasks' in self.config:
            app.inference_tasks = [
                setup_periodic_task(task)
                for task in self.config['inference_tasks']
            ]

        for task in app.inference_tasks:
            if 'schedule' in task:
                #print "Scheduling task", task['name'], task['schedule']
                self.celery.add_periodic_task(crontab(**task['schedule']),
                                              task['find_instances'].s(),
                                              name=task['name'])
            else:
                task['find_instances'].delay()

        @self.celery.task()
        def update(nanopub_uri):
            '''gets called whenever there is a change in the knowledge graph.
            Performs a breadth-first knowledge expansion of the current change.'''
            #print "Updating on", nanopub_uri
            nanopub = app.nanopub_manager.get(nanopub_uri)
            nanopub_graph = ConjunctiveGraph(nanopub.store)
            if 'inferencers' in self.config:
                for name, service in self.config['inferencers'].items():
                    service.app = self
                    if service.query_predicate == self.NS.graphene.globalChangeQuery:
                        #print "checking", name, service.get_query()
                        process_resource(name)
                    if service.query_predicate == self.NS.graphene.updateChangeQuery:
                        #print "checking", name, nanopub_uri, service.get_query()
                        if len(list(nanopub_graph.query(
                                service.get_query()))) > 0:
                            print "invoking", name, nanopub_uri
                            process_nanopub(nanopub_uri, name)

        def run_update(nanopub_uri):
            update.delay(nanopub_uri)

        self.nanopub_update_listener = run_update

        @self.celery.task(retry_backoff=True,
                          retry_jitter=True,
                          autoretry_for=(Exception, ),
                          max_retries=4)
        def run_importer(entity_name):
            importer = self.find_importer(entity_name)
            modified = importer.last_modified(entity_name, self.db,
                                              self.nanopub_manager)
            updated = importer.modified(entity_name)
            if updated is None:
                updated = datetime.now(pytz.utc)
            print "Remote modified:", updated, type(
                updated), "Local modified:", modified, type(modified)
            if modified is None or (updated - modified
                                    ).total_seconds() > importer.min_modified:
                importer.load(entity_name, self.db, self.nanopub_manager)

        self.run_importer = run_importer