def _test_graph(self, pgCreator, socketListeners=1): if isinstance(pgCreator, six.string_types): pgCreator = "test.graphsRepository.%s" % (pgCreator) task = FinishGraphExecution(pgCreator=pgCreator) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) # Start executing the SocketListenerApps so they open their ports for drop, _ in droputils.breadFirstTraverse(task.roots): if isinstance(drop, SocketListenerApp): threading.Thread(target=lambda drop: drop.execute(), args=(drop, )).start() # Write to the initial nodes of the graph to trigger the graph execution for i in range(socketListeners): threading.Thread(target=utils.write_to, name='socketWriter', args=("localhost", 1111 + i, test_data, 2)).start() # Run the graph! Luigi will either monitor or execute the DROPs w.run() w.stop() # ... but at the end all the nodes of the graph should be completed # and should exist for drop, _ in droputils.breadFirstTraverse(task.roots): self.assertTrue( drop.isCompleted() and drop.exists(), "%s is not COMPLETED or doesn't exist" % (drop.uid))
def _create_scheduler(): config = interface.get_config() retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0) remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0) worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0) return scheduler.CentralPlannerScheduler(retry_delay, remove_delay, worker_disconnect_delay)
def create_local_scheduler(self): return scheduler.CentralPlannerScheduler(prune_on_get_work=True)
def create_local_scheduler(self): return scheduler.CentralPlannerScheduler(prune_on_get_work=True, record_task_history=False)
def create_local_scheduler(self): return scheduler.CentralPlannerScheduler()
def deploy(self, completedDrops=[], foreach=None): """ Creates the DROPs represented by all the graph specs contained in this session, effectively deploying them. When this method has finished executing a Pyro Daemon will also be up and running, servicing requests to access to all the DROPs belonging to this session """ status = self.status if status != SessionStates.BUILDING: raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status)) self.status = SessionStates.DEPLOYING # Create the real DROPs from the graph specs logger.info("Creating DROPs for session %s", self._sessionId) self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values()) logger.info("%d drops successfully created", len(self._graph)) for drop,_ in droputils.breadFirstTraverse(self._roots): # Register them self._drops[drop.uid] = drop # Register them with the error handler if self._error_status_listener: drop.subscribe(self._error_status_listener, eventType='status') logger.info("Stored all drops, proceeding with further customization") # Start the luigi task that will make sure the graph is executed # If we're not using luigi we still if self._enable_luigi: logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId) task = luigi_int.FinishGraphExecution(self._sessionId, self._roots) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) workerT = threading.Thread(None, self._run, args=[w]) workerT.daemon = True workerT.start() else: leaves = droputils.getLeafNodes(self._roots) logger.info("Adding completion listener to leaf drops") listener = LeavesCompletionListener(leaves, self) for leaf in leaves: if isinstance(leaf, AppDROP): leaf.subscribe(listener, 'producerFinished') else: leaf.subscribe(listener, 'dropCompleted') logger.info("Listener added to leaf drops") # We move to COMPLETED the DROPs that we were requested to # InputFiredAppDROP are here considered as having to be executed and # not directly moved to COMPLETED. # # This is done in a separate iteration at the very end because all drops # to make sure all event listeners are ready self.trigger_drops(completedDrops) # Foreach if foreach: logger.info("Invoking 'foreach' on each drop") for drop,_ in droputils.breadFirstTraverse(self._roots): foreach(drop) logger.info("'foreach' invoked for each drop") # Append proxies logger.info("Creating %d drop proxies", len(self._proxyinfo)) for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo: proxy = DropProxy(nm, host, port, self._sessionId, remote_uid) method = getattr(self._drops[local_uid], relname) method(proxy, False) self.status = SessionStates.RUNNING logger.info("Session %s is now RUNNING", self._sessionId)
def run(self): os.system('touch "%s"' % self.output().path) class DepTask(DependencyTriggeredTask): def requires(self): return [SimpleTask()] def output(self): return luigi.LocalTarget('/tmp/b-created') def run(self): os.system('touch "%s"' % self.output().path) if __name__ == '__main__': os.system('rm -f /tmp/a-created /tmp/b-created') interface.setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(SimpleTask()) w.run() w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created /tmp/b-created') w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created') w.add(DepTask()) w.run()
def main(): update_markers = mongo_get_update_markers() # Make sure the updates have all mongo classes bulk_tasks = [ MongoCollectionIndexTask, MongoCollectionEventTask, MongoCatalogueTask, MongoTaxonomyTask, # MongoMultimediaTask, MongoSiteTask, UnpublishTask, MongoDeleteTask ] def _get_task_names(tasks): """ We need to initiate and get the family name, not just the class name MongoDeleteTask => DeleteTask @param tasks: @return: """ return [unicode(task(date=0).task_family) for task in tasks] full_export_date = int(config.get('keemu', 'full_export_date')) for date, update_marker in update_markers.iteritems(): # If this is the fll export date, MongoDeleteTask is not required if full_export_date and date == full_export_date: bulk_task_copy = list(bulk_tasks) bulk_task_copy.remove(MongoDeleteTask) bulk_task_names = _get_task_names(bulk_task_copy) else: bulk_task_names = _get_task_names(bulk_tasks) # Assert that for every date we have all the bulk tasks missing_tasks = list(set(bulk_task_names) - set(update_marker)) assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % ( date, missing_tasks) # Get a list of all export files to process export_dates = [ d for d in get_export_file_dates() if d not in update_markers.keys() ] # Run setup_interface_logging to ensure luigi commands setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = BulkWorker(scheduler=sch) for export_date in export_dates: log.info('Processing date %s', export_date) # We only need to call the mongo delete task, as all other tasks are a requirement # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask w.add(MongoDeleteTask(date=export_date, force=True)) w.run() w.stop()