def _test_graph(self, pgCreator, socketListeners=1): if isinstance(pgCreator, six.string_types): pgCreator = "test.graphsRepository.%s" % (pgCreator) task = FinishGraphExecution(pgCreator=pgCreator) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) # Start executing the SocketListenerApps so they open their ports for drop, _ in droputils.breadFirstTraverse(task.roots): if isinstance(drop, SocketListenerApp): threading.Thread(target=lambda drop: drop.execute(), args=(drop, )).start() # Write to the initial nodes of the graph to trigger the graph execution for i in range(socketListeners): threading.Thread(target=utils.write_to, name='socketWriter', args=("localhost", 1111 + i, test_data, 2)).start() # Run the graph! Luigi will either monitor or execute the DROPs w.run() w.stop() # ... but at the end all the nodes of the graph should be completed # and should exist for drop, _ in droputils.breadFirstTraverse(task.roots): self.assertTrue( drop.isCompleted() and drop.exists(), "%s is not COMPLETED or doesn't exist" % (drop.uid))
def create_worker(scheduler, worker_processes, assistant=False): """Creates a worker :returns: The worker :rtype: :class:`worker.Worker` """ return worker.Worker(scheduler=scheduler, worker_processes=worker_processes, assistant=assistant)
def run_task(task): worker_num = int(util.luigi_workers) # multiprocess = worker_num > 1 w = worker.Worker(scheduler=scheduler, no_install_shutdown_handler=True, worker_processes=worker_num) w.add(task, multiprocess=True, processes=2) w.run()
def trigger_pipeline(): ''' run the pipeline ''' doc_dir = 'bcube_demo/docs' # capture the main luigi output ste = sys.stderr sys.stderr = pipeline_output = StringIO.StringIO() std = sys.stdout sys.stdout = pipeline_debug = StringIO.StringIO() pull_from_solr(doc_dir) task = MainWorkflow(doc_dir=doc_dir, yaml_file='configs/bcube_demo.yaml') luigi.interface.setup_interface_logging() sch = luigi.scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) w.run() # store and reset piped = pipeline_output.getvalue() sys.stderr = ste debugs = pipeline_debug.getvalue() sys.stdout = std # # fake the generator # def generate(): # for pipe in piped.split('\n'): # yield pipe + '\n' # yield '\n\n####################\n\n' # for debug in debugs.split('\n'): # yield debug + '\n' # return Response(generate(), mimetype='text/plain') def generate_urn(): for urn in glob.glob('bcube_demo/triples/*.txt'): with open(urn, 'r') as f: u = f.read().strip() yield u + '\n' return Response(generate_urn(), mimetype='text/plain')
def create_worker(self, scheduler, worker_processes, assistant=False): return worker.Worker( scheduler=scheduler, worker_processes=worker_processes, assistant=assistant )
def deploy(self, completedDrops=[], foreach=None): """ Creates the DROPs represented by all the graph specs contained in this session, effectively deploying them. When this method has finished executing a Pyro Daemon will also be up and running, servicing requests to access to all the DROPs belonging to this session """ status = self.status if status != SessionStates.BUILDING: raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status)) self.status = SessionStates.DEPLOYING # Create the real DROPs from the graph specs logger.info("Creating DROPs for session %s", self._sessionId) self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values()) logger.info("%d drops successfully created", len(self._graph)) for drop,_ in droputils.breadFirstTraverse(self._roots): # Register them self._drops[drop.uid] = drop # Register them with the error handler if self._error_status_listener: drop.subscribe(self._error_status_listener, eventType='status') logger.info("Stored all drops, proceeding with further customization") # Start the luigi task that will make sure the graph is executed # If we're not using luigi we still if self._enable_luigi: logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId) task = luigi_int.FinishGraphExecution(self._sessionId, self._roots) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) workerT = threading.Thread(None, self._run, args=[w]) workerT.daemon = True workerT.start() else: leaves = droputils.getLeafNodes(self._roots) logger.info("Adding completion listener to leaf drops") listener = LeavesCompletionListener(leaves, self) for leaf in leaves: if isinstance(leaf, AppDROP): leaf.subscribe(listener, 'producerFinished') else: leaf.subscribe(listener, 'dropCompleted') logger.info("Listener added to leaf drops") # We move to COMPLETED the DROPs that we were requested to # InputFiredAppDROP are here considered as having to be executed and # not directly moved to COMPLETED. # # This is done in a separate iteration at the very end because all drops # to make sure all event listeners are ready self.trigger_drops(completedDrops) # Foreach if foreach: logger.info("Invoking 'foreach' on each drop") for drop,_ in droputils.breadFirstTraverse(self._roots): foreach(drop) logger.info("'foreach' invoked for each drop") # Append proxies logger.info("Creating %d drop proxies", len(self._proxyinfo)) for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo: proxy = DropProxy(nm, host, port, self._sessionId, remote_uid) method = getattr(self._drops[local_uid], relname) method(proxy, False) self.status = SessionStates.RUNNING logger.info("Session %s is now RUNNING", self._sessionId)
def run(self): os.system('touch "%s"' % self.output().path) class DepTask(DependencyTriggeredTask): def requires(self): return [SimpleTask()] def output(self): return luigi.LocalTarget('/tmp/b-created') def run(self): os.system('touch "%s"' % self.output().path) if __name__ == '__main__': os.system('rm -f /tmp/a-created /tmp/b-created') interface.setup_interface_logging() sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(SimpleTask()) w.run() w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created /tmp/b-created') w.add(DepTask()) w.run() os.system('rm -f /tmp/a-created') w.add(DepTask()) w.run()
def create_worker(self, scheduler, worker_processes): return worker.Worker( scheduler=scheduler, worker_processes=worker_processes)