Ejemplo n.º 1
0
    def _local_train_phase(self, data_iterable):
        """Perform a single training phase locally.

        The internal _train_callable_class is used for the training.
        """
        current_node = self.flow[self._i_train_node]
        task_callable = self._train_callable_class(self._flownode,
                                                   purge_nodes=False)
        empty_iterator = True
        for i_task, data in enumerate(data_iterable):
            empty_iterator = False
            # Note: if x contains additional args assume that the
            # callable can handle this
            task_callable(data)
            if self.verbose:
                print("    finished nonparallel task no. %d" % (i_task + 1))
        if empty_iterator:
            if current_node.get_current_train_phase() == 1:
                err_str = ("The training data iteration for node "
                           "no. %d could not be repeated for the "
                           "second training phase, you probably "
                           "provided an iterator instead of an "
                           "iterable." % (self._i_train_node + 1))
                raise mdp.FlowException(err_str)
            else:
                err_str = ("The training data iterator for node "
                           "no. %d is empty." % (self._i_train_node + 1))
                raise mdp.FlowException(err_str)
Ejemplo n.º 2
0
    def execute(self, x, nodenr=None):
        if not isinstance(x, mdp.numx.ndarray):
            errstr = ("FreerunFlows can only be executed using numpy arrays as input.")
            raise mdp.FlowException(errstr)

        if self.freerun_steps >= x.shape[0]:
            errstr = ("Number of freerun steps (%d) should be less than the number of timesteps in x (%d)" % (self.freerun_steps, x.shape[0]))
            raise mdp.FlowException(errstr)

        # Run the flow for warmup
        if self.freerun_steps > x.shape[0]:
            errstr = ("The number of freerun steps (%d) is larger than the input (%d):" % (self.freerun_steps, x.shape[0]))
            raise mdp.FlowException(errstr)

        if self.external_input_range is None:
            external_input_range = []
        else:
            external_input_range = self.external_input_range

        self._execute_seq(x[:-self.freerun_steps, :])
        freerun_range = mdp.numx.setdiff1d(range(x.shape[1]), external_input_range)
        self.fb_value = mdp.numx.atleast_2d(x[-self.freerun_steps, freerun_range])

        res = mdp.numx.zeros((self.freerun_steps, x.shape[1]))
        if self.external_input_range is None:
            for step in range(self.freerun_steps):
                res[step] = self.fb_value
                self.fb_value = self._execute_seq(mdp.numx.atleast_2d(self.fb_value))
        else:
            for step in range(self.freerun_steps):
                external_input = mdp.numx.atleast_2d(x[-self.freerun_steps + step, external_input_range])
                total_input = mdp.numx.atleast_2d(mdp.numx.concatenate((external_input, self.fb_value), 1))
                res[step] = total_input
                self.fb_value = self._execute_seq(total_input)
        return mdp.numx.concatenate((x[:-self.freerun_steps, :], res))
Ejemplo n.º 3
0
 def __init__(self, flow, crash_recovery=False, verbose=False, freerun_steps=None, external_input_range=None):
     super(FreerunFlow, self).__init__(flow, crash_recovery, verbose)
     if freerun_steps is None:
         errstr = ("The FreerunFlow must be initialized with an explicit freerun horizon.")
         raise mdp.FlowException(errstr)
     self.freerun_steps = freerun_steps
     self.external_input_range = external_input_range
Ejemplo n.º 4
0
    def setup_parallel_execution(self,
                                 iterable,
                                 msg_iterable=None,
                                 target_iterable=None,
                                 execute_callable_class=BiFlowExecuteCallable):
        """Prepare the flow for handing out tasks to do the execution.

        Instead of automatically executing the _flow with the iterable, it only
        prepares the tasks for the scheduler.

        iterable -- Single array or iterable.
        msg_iterable -- Single message or iterable.
        target_iterable -- Single target or iterable.
        execute_callable_class -- Class used to create execution callables for
            the scheduler. By specifying your own class you can implement data
            transformations before the data is actually fed into the flow
            (e.g. from 8 bit image to 64 bit double precision).
            Note that the execute_callable_class is only used if a scheduler
            was provided. If a scheduler is provided the default class used is
            NodeResultContainer.
        """
        self._bi_reset()  # normally not required, just for safety
        if self.is_parallel_training:
            raise ParallelBiFlowException("Parallel training is underway.")
        self._execute_callable_class = execute_callable_class
        iterable, msg_iterable, target_iterable = self._sanitize_iterables(
            iterable, msg_iterable, target_iterable)
        self._exec_data_iterator = iter(iterable)
        self._exec_msg_iterator = iter(msg_iterable)
        self._exec_target_iterator = iter(target_iterable)
        first_task = self._create_execute_task()
        if first_task is None:
            err = ("The execute data iterable is empty.")
            raise mdp.FlowException(err)
        task_data_chunk = first_task[0]
        if task_data_chunk is None:
            err = "Execution data iterable is empty."
            raise ParallelBiFlowException(err)
        # Only first task contains the new callable (enable caching).
        # A fork is not required here, since the callable is always
        # forked in the scheduler.
        self._next_task = (task_data_chunk,
                           self._execute_callable_class(self._flownode,
                                                        purge_nodes=True))
Ejemplo n.º 5
0
    def setup_parallel_execution(self,
                                 iterable,
                                 nodenr=None,
                                 execute_callable_class=FlowExecuteCallable):
        """Prepare the flow for handing out tasks to do the execution.

        After calling setup_parallel_execution one has to pick up the
        tasks with get_task, run them and finally return the results via
        use_results. use_results will then return the result as if the flow was
        executed in the normal way.

        iterable -- An iterable or iterator that returns data arrays that are
            used as input to the flow. Alternatively, one can specify one
            data array as input.
            If a custom execute_callable_class is used to preprocess the data
            then other data types can be used as well.
        nodenr -- Same as in normal flow, the flow is only executed up to the
            nodenr.
        execute_callable_class -- Class used to create execution callables for
            the scheduler. By specifying your own class you can implement data
            transformations before the data is actually fed into the flow
            (e.g. from 8 bit image to 64 bit double precision).
        """
        if self.is_parallel_training:
            raise ParallelFlowException("Parallel training is underway.")
        self._execute_callable_class = execute_callable_class
        if isinstance(iterable, n.ndarray):
            iterable = [iterable]
        self._exec_data_iterator = iter(iterable)
        first_task = self._create_execute_task()
        if first_task is None:
            errstr = ("The execute data iterator is empty.")
            raise mdp.FlowException(errstr)
        task_data_chunk = first_task[0]
        # Only first task contains the new callable (enable caching).
        # A fork is not required here, since the callable is always
        # forked in the scheduler.
        self._next_task = (task_data_chunk,
                           self._execute_callable_class(self._flownode,
                                                        purge_nodes=True))
Ejemplo n.º 6
0
    def _next_train_phase(self):
        """Find the next phase or node for parallel training.

        When it is found the corresponding internal variables are set.
        Nodes which are not derived from ParallelNode are trained locally.
        If a fork() fails due to a TrainingPhaseNotParallelException
        in a certain train phase, then the training is done locally as well
        (but fork() is tested again for the next phase).
        """
        # find next node that can be forked, if required do local training
        while self._i_train_node < len(self.flow):
            current_node = self.flow[self._i_train_node]
            if not current_node.is_training():
                self._i_train_node += 1
                continue
            data_iterable = self._train_data_iterables[self._i_train_node]
            try:
                self._flownode.fork()
                # fork successful, prepare parallel training
                if self.verbose:
                    print("start parallel training phase of " +
                          "node no. %d in parallel flow" %
                          (self._i_train_node + 1))
                self._train_data_iterator = iter(data_iterable)
                first_task = self._create_train_task()
                # make sure that the iterator is not empty
                if first_task is None:
                    if current_node.get_current_train_phase() == 1:
                        err_str = ("The training data iteration for node "
                                   "no. %d could not be repeated for the "
                                   "second training phase, you probably "
                                   "provided an iterator instead of an "
                                   "iterable." % (self._i_train_node + 1))
                        raise mdp.FlowException(err_str)
                    else:
                        err_str = ("The training data iterator for node "
                                   "no. %d is empty." %
                                   (self._i_train_node + 1))
                        raise mdp.FlowException(err_str)
                task_data_chunk = first_task[0]
                # Only first task contains the new callable (enable caching).
                # A fork is not required here, since the callable is always
                # forked in the scheduler.
                self._next_task = (task_data_chunk,
                                   self._train_callable_class(self._flownode))
                break
            except NotForkableParallelException as exception:
                if self.verbose:
                    print("could not fork node no. %d: %s" %
                          (self._i_train_node + 1, str(exception)))
                    print("start nonparallel training phase of " +
                          "node no. %d in parallel flow" %
                          (self._i_train_node + 1))
                self._local_train_phase(data_iterable)
                if self.verbose:
                    print("finished nonparallel training phase of " +
                          "node no. %d in parallel flow" %
                          (self._i_train_node + 1))
                self._stop_training_hook()
                self._flownode.stop_training()
                self._post_stop_training_hook()
                if not self.flow[self._i_train_node].is_training():
                    self._i_train_node += 1
        else:
            # training is finished
            self._i_train_node = None