Example #1
0
def main():

    basicConfig(level=DEBUG)

    w = Workflow()

    m1 = w.add(ConstantList)
    m2 = w.add(ConstantList)
    m3 = w.add(Zip2)
    m4 = w.add(StandardOutput)

    w.set_value(m1, 'in_values', [-i for i in range(23)], depth=1)
    w.set_value(m2, 'in_values', [i for i in range(29)], depth=1)

    w.connect(m1, 'out_values', m3, 'left')
    w.connect(m2, 'out_values', m3, 'right')
    w.connect(m3, 'zip', m4, 'data')

    #w.print_dot()

    interpreter = Interpreter(w)
    interpreter.make_fake_pipeline()
    interpreter.execute_pipeline()
class Interpreter(object):

    def __init__(self, workflow):
        # workflow reference
        self._workflow = workflow
        self.ready_tasks = list()

    def make_fake_pipeline(self):
        import basic_modules as basic

        self._workflow = Workflow()

        m0 = self._workflow.add(basic.Constant)
        m1 = self._workflow.add(basic.ReadFile)
        self._workflow.set_value(m0, "in_value", "/etc/passwd", depth=0)
        self._workflow.connect(m0, 'out_value', m1, 'path')

        m2 = self._workflow.add(basic.Count)
        self._workflow.connect(m1, 'line', m2, 'data')

        m3 = self._workflow.add(basic.RandomNumbers)
        m4 = self._workflow.add(basic.Zip3)
        self._workflow.connect(m1, 'line', m4, 'left')
        self._workflow.connect(m3, 'number', m4, 'right')

        m5 = self._workflow.add(basic.StandardOutput)
        self._workflow.connect(m2, 'length', m5, 'data')

        m6 = self._workflow.add(basic.StandardOutput)
        self._workflow.connect(m4, 'zip', m6, 'data')

        logger.debug("Fake pipeline created")

    def create_dependant_tasks(self, module):
        """
        create the corresponding tasks for a module depending on its state
        :param module:
        :return: a Task list
        """

        # TODO: specify automaton for modules states to avoid too many flags

        if not module.starting:
            self.ready_tasks.append(StartTask(module))
            module.starting = True

        if module.output:
            # spawn input tasks for all down streams modules
            for port_name in module.expected_output:
                # get all downstream port for broadcast messages
                for downstream_input in self._workflow.get_next(module.down[port_name]):
                    downstream_module = self._workflow.get_next(downstream_input)[0]
                    #self.ready_tasks.append(InputTask(downstream_module, downstream_input.port.name))
                    self.create_dependant_tasks(downstream_module)
            # reset module dep
            # TODO: make it better
            module.output = False
            module.expected_output = list()

        if module.finished:
            # the module is finished and data that may remain in the buffer is flushed and tasks are produced
            # no need to check other states
            return

        if module.producing and not module.finishing:
            # TODO: fix infinite lopp for infinite modules
            # if module is a producing module (step), append a new task to produce next data eventually
            self.ready_tasks.append(OutputTask(module))
            module.producing = False

        if not module.finishing and module.finish_reason is not None:
            # spawn finish task
            module.finishing = True
            self.ready_tasks.append(FinishTask(module))

        if module.wait_input and not module.finishing:
            for port_name in module.expected_input:
                # check if the data is available,
                if module.up[port_name].size() > 0:
                    logger.debug("data available on module %r port '%s'" % (module, port_name))
                    self.ready_tasks.append(InputTask(module, port_name))
                    module.expected_input.remove(port_name)
                    module.wait_input = len(module.expected_input) != 0
                # check if the up stream is closed
                elif module.up[port_name].stream.closed:
                    logger.debug("stream closed on port '%s'" % port_name)
                    self.ready_tasks.append(InputEndTask(module, port_name))
                #if data not available, execute tasks of upstream module
                else:
                    logger.debug("data not available on module %r port '%s'" % (module, port_name))
                    up_module = self._workflow.get_upstream_module_from_port(module.up[port_name])
                    self.create_dependant_tasks(up_module)

    def execute_pipeline(self):
        # update effective depths ports of the workflow
        self._workflow.check_depth()

        # task counter
        executed_tasks = 0

        # list of started modules
        started_modules = set()
        sinks = self._workflow.get_sinks()
        # list of modules already running
        self.ready_tasks = [StartTask(mod) for mod in sinks]
        # set modules as starting
        for mod in sinks:
            mod.starting = True

        # main loop of the scheduler
        while self.ready_tasks:
            logger.debug("========================================")
            logger.debug(self.ready_tasks)
            logger.debug("========================================")
            task = self.ready_tasks.pop(0)
            # if task is input task and ready to be executed, execute, else reschedule later

            logger.debug("execute task %s" % task)
            task.execute()
            self.create_dependant_tasks(task.module)
            #time.sleep(0.5)

            executed_tasks += 1

        logger.debug("#################### ENDING WORKFLOW ####################")

        # explicitely call finish on sinks if not already done
        for mod in sinks:
            mod.finish_reason = FinishReason.TERMINATE
            self.create_dependant_tasks(mod)

        logger.debug("========================================")
        logger.debug(self.ready_tasks)
        logger.debug("========================================")

        while self.ready_tasks:
            task = self.ready_tasks.pop(0)
            logger.debug("========================================")
            logger.debug("execute task %s" % task)
            task.execute()

            executed_tasks += 1

        logger.debug("Executed %d tasks" % executed_tasks)