def make_fake_pipeline(self): import basic_modules as basic self._workflow = Workflow() m0 = self._workflow.add(basic.Constant) m1 = self._workflow.add(basic.ReadFile) self._workflow.set_value(m0, "in_value", "/etc/passwd", depth=0) self._workflow.connect(m0, 'out_value', m1, 'path') m2 = self._workflow.add(basic.Count) self._workflow.connect(m1, 'line', m2, 'data') m3 = self._workflow.add(basic.RandomNumbers) m4 = self._workflow.add(basic.Zip3) self._workflow.connect(m1, 'line', m4, 'left') self._workflow.connect(m3, 'number', m4, 'right') m5 = self._workflow.add(basic.StandardOutput) self._workflow.connect(m2, 'length', m5, 'data') m6 = self._workflow.add(basic.StandardOutput) self._workflow.connect(m4, 'zip', m6, 'data') logger.debug("Fake pipeline created")
def main(): basicConfig(level=DEBUG) w = Workflow() m1 = w.add(ConstantList) m2 = w.add(ConstantList) m3 = w.add(Zip2) m4 = w.add(StandardOutput) w.set_value(m1, 'in_values', [-i for i in range(23)], depth=1) w.set_value(m2, 'in_values', [i for i in range(29)], depth=1) w.connect(m1, 'out_values', m3, 'left') w.connect(m2, 'out_values', m3, 'right') w.connect(m3, 'zip', m4, 'data') #w.print_dot() interpreter = Interpreter(w) interpreter.make_fake_pipeline() interpreter.execute_pipeline()
class Interpreter(object): def __init__(self, workflow): # workflow reference self._workflow = workflow self.ready_tasks = list() def make_fake_pipeline(self): import basic_modules as basic self._workflow = Workflow() m0 = self._workflow.add(basic.Constant) m1 = self._workflow.add(basic.ReadFile) self._workflow.set_value(m0, "in_value", "/etc/passwd", depth=0) self._workflow.connect(m0, 'out_value', m1, 'path') m2 = self._workflow.add(basic.Count) self._workflow.connect(m1, 'line', m2, 'data') m3 = self._workflow.add(basic.RandomNumbers) m4 = self._workflow.add(basic.Zip3) self._workflow.connect(m1, 'line', m4, 'left') self._workflow.connect(m3, 'number', m4, 'right') m5 = self._workflow.add(basic.StandardOutput) self._workflow.connect(m2, 'length', m5, 'data') m6 = self._workflow.add(basic.StandardOutput) self._workflow.connect(m4, 'zip', m6, 'data') logger.debug("Fake pipeline created") def create_dependant_tasks(self, module): """ create the corresponding tasks for a module depending on its state :param module: :return: a Task list """ # TODO: specify automaton for modules states to avoid too many flags if not module.starting: self.ready_tasks.append(StartTask(module)) module.starting = True if module.output: # spawn input tasks for all down streams modules for port_name in module.expected_output: # get all downstream port for broadcast messages for downstream_input in self._workflow.get_next(module.down[port_name]): downstream_module = self._workflow.get_next(downstream_input)[0] #self.ready_tasks.append(InputTask(downstream_module, downstream_input.port.name)) self.create_dependant_tasks(downstream_module) # reset module dep # TODO: make it better module.output = False module.expected_output = list() if module.finished: # the module is finished and data that may remain in the buffer is flushed and tasks are produced # no need to check other states return if module.producing and not module.finishing: # TODO: fix infinite lopp for infinite modules # if module is a producing module (step), append a new task to produce next data eventually self.ready_tasks.append(OutputTask(module)) module.producing = False if not module.finishing and module.finish_reason is not None: # spawn finish task module.finishing = True self.ready_tasks.append(FinishTask(module)) if module.wait_input and not module.finishing: for port_name in module.expected_input: # check if the data is available, if module.up[port_name].size() > 0: logger.debug("data available on module %r port '%s'" % (module, port_name)) self.ready_tasks.append(InputTask(module, port_name)) module.expected_input.remove(port_name) module.wait_input = len(module.expected_input) != 0 # check if the up stream is closed elif module.up[port_name].stream.closed: logger.debug("stream closed on port '%s'" % port_name) self.ready_tasks.append(InputEndTask(module, port_name)) #if data not available, execute tasks of upstream module else: logger.debug("data not available on module %r port '%s'" % (module, port_name)) up_module = self._workflow.get_upstream_module_from_port(module.up[port_name]) self.create_dependant_tasks(up_module) def execute_pipeline(self): # update effective depths ports of the workflow self._workflow.check_depth() # task counter executed_tasks = 0 # list of started modules started_modules = set() sinks = self._workflow.get_sinks() # list of modules already running self.ready_tasks = [StartTask(mod) for mod in sinks] # set modules as starting for mod in sinks: mod.starting = True # main loop of the scheduler while self.ready_tasks: logger.debug("========================================") logger.debug(self.ready_tasks) logger.debug("========================================") task = self.ready_tasks.pop(0) # if task is input task and ready to be executed, execute, else reschedule later logger.debug("execute task %s" % task) task.execute() self.create_dependant_tasks(task.module) #time.sleep(0.5) executed_tasks += 1 logger.debug("#################### ENDING WORKFLOW ####################") # explicitely call finish on sinks if not already done for mod in sinks: mod.finish_reason = FinishReason.TERMINATE self.create_dependant_tasks(mod) logger.debug("========================================") logger.debug(self.ready_tasks) logger.debug("========================================") while self.ready_tasks: task = self.ready_tasks.pop(0) logger.debug("========================================") logger.debug("execute task %s" % task) task.execute() executed_tasks += 1 logger.debug("Executed %d tasks" % executed_tasks)