def _local_train_phase(self, data_iterable): """Perform a single training phase locally. The internal _train_callable_class is used for the training. """ current_node = self.flow[self._i_train_node] task_callable = self._train_callable_class(self._flownode, purge_nodes=False) empty_iterator = True for i_task, data in enumerate(data_iterable): empty_iterator = False # Note: if x contains additional args assume that the # callable can handle this task_callable(data) if self.verbose: print(" finished nonparallel task no. %d" % (i_task + 1)) if empty_iterator: if current_node.get_current_train_phase() == 1: err_str = ("The training data iteration for node " "no. %d could not be repeated for the " "second training phase, you probably " "provided an iterator instead of an " "iterable." % (self._i_train_node + 1)) raise mdp.FlowException(err_str) else: err_str = ("The training data iterator for node " "no. %d is empty." % (self._i_train_node + 1)) raise mdp.FlowException(err_str)
def execute(self, x, nodenr=None): if not isinstance(x, mdp.numx.ndarray): errstr = ("FreerunFlows can only be executed using numpy arrays as input.") raise mdp.FlowException(errstr) if self.freerun_steps >= x.shape[0]: errstr = ("Number of freerun steps (%d) should be less than the number of timesteps in x (%d)" % (self.freerun_steps, x.shape[0])) raise mdp.FlowException(errstr) # Run the flow for warmup if self.freerun_steps > x.shape[0]: errstr = ("The number of freerun steps (%d) is larger than the input (%d):" % (self.freerun_steps, x.shape[0])) raise mdp.FlowException(errstr) if self.external_input_range is None: external_input_range = [] else: external_input_range = self.external_input_range self._execute_seq(x[:-self.freerun_steps, :]) freerun_range = mdp.numx.setdiff1d(range(x.shape[1]), external_input_range) self.fb_value = mdp.numx.atleast_2d(x[-self.freerun_steps, freerun_range]) res = mdp.numx.zeros((self.freerun_steps, x.shape[1])) if self.external_input_range is None: for step in range(self.freerun_steps): res[step] = self.fb_value self.fb_value = self._execute_seq(mdp.numx.atleast_2d(self.fb_value)) else: for step in range(self.freerun_steps): external_input = mdp.numx.atleast_2d(x[-self.freerun_steps + step, external_input_range]) total_input = mdp.numx.atleast_2d(mdp.numx.concatenate((external_input, self.fb_value), 1)) res[step] = total_input self.fb_value = self._execute_seq(total_input) return mdp.numx.concatenate((x[:-self.freerun_steps, :], res))
def __init__(self, flow, crash_recovery=False, verbose=False, freerun_steps=None, external_input_range=None): super(FreerunFlow, self).__init__(flow, crash_recovery, verbose) if freerun_steps is None: errstr = ("The FreerunFlow must be initialized with an explicit freerun horizon.") raise mdp.FlowException(errstr) self.freerun_steps = freerun_steps self.external_input_range = external_input_range
def setup_parallel_execution(self, iterable, msg_iterable=None, target_iterable=None, execute_callable_class=BiFlowExecuteCallable): """Prepare the flow for handing out tasks to do the execution. Instead of automatically executing the _flow with the iterable, it only prepares the tasks for the scheduler. iterable -- Single array or iterable. msg_iterable -- Single message or iterable. target_iterable -- Single target or iterable. execute_callable_class -- Class used to create execution callables for the scheduler. By specifying your own class you can implement data transformations before the data is actually fed into the flow (e.g. from 8 bit image to 64 bit double precision). Note that the execute_callable_class is only used if a scheduler was provided. If a scheduler is provided the default class used is NodeResultContainer. """ self._bi_reset() # normally not required, just for safety if self.is_parallel_training: raise ParallelBiFlowException("Parallel training is underway.") self._execute_callable_class = execute_callable_class iterable, msg_iterable, target_iterable = self._sanitize_iterables( iterable, msg_iterable, target_iterable) self._exec_data_iterator = iter(iterable) self._exec_msg_iterator = iter(msg_iterable) self._exec_target_iterator = iter(target_iterable) first_task = self._create_execute_task() if first_task is None: err = ("The execute data iterable is empty.") raise mdp.FlowException(err) task_data_chunk = first_task[0] if task_data_chunk is None: err = "Execution data iterable is empty." raise ParallelBiFlowException(err) # Only first task contains the new callable (enable caching). # A fork is not required here, since the callable is always # forked in the scheduler. self._next_task = (task_data_chunk, self._execute_callable_class(self._flownode, purge_nodes=True))
def setup_parallel_execution(self, iterable, nodenr=None, execute_callable_class=FlowExecuteCallable): """Prepare the flow for handing out tasks to do the execution. After calling setup_parallel_execution one has to pick up the tasks with get_task, run them and finally return the results via use_results. use_results will then return the result as if the flow was executed in the normal way. iterable -- An iterable or iterator that returns data arrays that are used as input to the flow. Alternatively, one can specify one data array as input. If a custom execute_callable_class is used to preprocess the data then other data types can be used as well. nodenr -- Same as in normal flow, the flow is only executed up to the nodenr. execute_callable_class -- Class used to create execution callables for the scheduler. By specifying your own class you can implement data transformations before the data is actually fed into the flow (e.g. from 8 bit image to 64 bit double precision). """ if self.is_parallel_training: raise ParallelFlowException("Parallel training is underway.") self._execute_callable_class = execute_callable_class if isinstance(iterable, n.ndarray): iterable = [iterable] self._exec_data_iterator = iter(iterable) first_task = self._create_execute_task() if first_task is None: errstr = ("The execute data iterator is empty.") raise mdp.FlowException(errstr) task_data_chunk = first_task[0] # Only first task contains the new callable (enable caching). # A fork is not required here, since the callable is always # forked in the scheduler. self._next_task = (task_data_chunk, self._execute_callable_class(self._flownode, purge_nodes=True))
def _next_train_phase(self): """Find the next phase or node for parallel training. When it is found the corresponding internal variables are set. Nodes which are not derived from ParallelNode are trained locally. If a fork() fails due to a TrainingPhaseNotParallelException in a certain train phase, then the training is done locally as well (but fork() is tested again for the next phase). """ # find next node that can be forked, if required do local training while self._i_train_node < len(self.flow): current_node = self.flow[self._i_train_node] if not current_node.is_training(): self._i_train_node += 1 continue data_iterable = self._train_data_iterables[self._i_train_node] try: self._flownode.fork() # fork successful, prepare parallel training if self.verbose: print("start parallel training phase of " + "node no. %d in parallel flow" % (self._i_train_node + 1)) self._train_data_iterator = iter(data_iterable) first_task = self._create_train_task() # make sure that the iterator is not empty if first_task is None: if current_node.get_current_train_phase() == 1: err_str = ("The training data iteration for node " "no. %d could not be repeated for the " "second training phase, you probably " "provided an iterator instead of an " "iterable." % (self._i_train_node + 1)) raise mdp.FlowException(err_str) else: err_str = ("The training data iterator for node " "no. %d is empty." % (self._i_train_node + 1)) raise mdp.FlowException(err_str) task_data_chunk = first_task[0] # Only first task contains the new callable (enable caching). # A fork is not required here, since the callable is always # forked in the scheduler. self._next_task = (task_data_chunk, self._train_callable_class(self._flownode)) break except NotForkableParallelException as exception: if self.verbose: print("could not fork node no. %d: %s" % (self._i_train_node + 1, str(exception))) print("start nonparallel training phase of " + "node no. %d in parallel flow" % (self._i_train_node + 1)) self._local_train_phase(data_iterable) if self.verbose: print("finished nonparallel training phase of " + "node no. %d in parallel flow" % (self._i_train_node + 1)) self._stop_training_hook() self._flownode.stop_training() self._post_stop_training_hook() if not self.flow[self._i_train_node].is_training(): self._i_train_node += 1 else: # training is finished self._i_train_node = None