def test_pretrained_nodes(self): """Test a TestBiFlowNode with two normal pretrained nodes.""" sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = BiFlowNode(BiFlow([sfa_node, sfa2_node])) flow = mdp.Flow([flownode]) data_iterables = [[n.random.random((30,10)) for _ in range(6)]] flow.train(data_iterables) pretrained_flow = flow[0]._flow biflownode = BiFlowNode(pretrained_flow) x = n.random.random([100,10]) biflownode.execute(x)
def trace_training(self, path, flow, x, msg=None, stop_msg=None, trace_name="training", debug=False, **kwargs): """Trace a single training phase and the stop_training. Return a tuple containing a list of the training slide filenames, the training node ids and the same for stop_training. path -- Path were the inspection files will be stored. trace_name -- Name prefix for this inspection (default is training). **kwargs -- Additional arguments for flow.train can be specified as keyword arguments. """ self._reset() self._trace_path = path # train and stop filenames must be different self._trace_name = trace_name + "_t" self._flow = flow self._tracing_decorator.decorate_flow(flow) biflownode = BiFlowNode(BiFlow(flow.flow)) try: biflownode.train(x=x, msg=msg, **kwargs) # reset is important for the following stop_training biflownode.bi_reset() # Note: this also catches legacy string exceptions (which are still # used in numpy, e.g. np.core.multiarray.error) except: if debug: # insert the error slide and encapsulate the exception traceback.print_exc() self._write_error_frame() result = (self._slide_filenames, self._slide_node_ids, None, None) raise TraceDebugException(result=result) else: raise train_filenames = self._slide_filenames train_node_ids = self._slide_node_ids self._reset() self._trace_name = trace_name + "_s" try: biflownode.stop_training(stop_msg) except: if debug: # insert the error slide and encapsulate the exception traceback.print_exc() self._write_error_frame() result = (train_filenames, train_node_ids, self._slide_filenames, self._slide_node_ids) raise TraceDebugException(result=result) else: raise stop_filenames = self._slide_filenames stop_node_ids = self._slide_node_ids # restore undecorated flow self._tracing_decorator.decorate_flow(flow, undecorate_mode=True) return train_filenames, train_node_ids, stop_filenames, stop_node_ids
def test_two_nodes2(self): """Test a TestBiFlowNode with two normal nodes using a normal Flow.""" sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = BiFlowNode(BiFlow([sfa_node, sfa2_node])) flow = mdp.Flow([flownode]) data_iterables = [[n.random.random((30,10)) for _ in range(6)]] flow.train(data_iterables) x = n.random.random([100,10]) flow.execute(x)
def test_two_nodes1(self): """Test a TestBiFlowNode with two normal nodes.""" sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = BiFlowNode(BiFlow([sfa_node, sfa2_node])) for _ in range(2): for _ in range(6): flownode.train(n.random.random((30,10))) flownode.stop_training() x = n.random.random([100,10]) flownode.execute(x)
def test_use_copies_msg_flownode(self): """Test the correct reaction to an outgoing use_copies message.""" stop_result = ({"clonelayer" + MSG_ID_SEP + "use_copies": True}, EXIT_TARGET) stop_sfa_node = SFABiNode(stop_result=stop_result, input_dim=10, output_dim=3) biflownode = BiFlowNode(BiFlow([stop_sfa_node])) clonelayer = CloneBiLayer(node=biflownode, n_nodes=3, use_copies=False, node_id="clonelayer") biflow = clonelayer + IdentityBiNode() x = n.random.random((100,30)) biflow.train(x) assert clonelayer.use_copies is True
def test_use_copies_msg(self): """Test the correct reaction to an outgoing use_copies message.""" stop_result = ({ "clonelayer" + MSG_ID_SEP + "use_copies": True }, EXIT_TARGET) stop_sfa_node = SFABiNode(stop_result=stop_result, input_dim=10, output_dim=3) biflownode = BiFlowNode(BiFlow([stop_sfa_node])) clonelayer = ParallelCloneBiLayer(node=biflownode, n_nodes=3, use_copies=False, node_id="clonelayer") data = [[n.random.random((100, 30)) for _ in range(5)]] biflow = ParallelBiFlow([clonelayer]) biflow.train(data, scheduler=mdp.parallel.Scheduler()) assert clonelayer.use_copies is True
def setup_parallel_training(self, data_iterables, msg_iterables=None, stop_messages=None, train_callable_class=BiFlowTrainCallable): """Prepare the flow for handing out tasks to do the training. After calling setup_parallel_training one has to pick up the tasks with get_task, run them and finally return the results via use_results. tasks are available as long as task_available is True. Training may require multiple phases, which are each closed by calling use_results. data_iterables -- A list of iterables, one for each node in the flow. The iterators returned by the iterables must return data arrays that are then used for the node training. See Flow.train for more details. If a custom train_callable_class is used to preprocess the data then other data types can be used as well. msg_iterables - A list of iterables for the messages. Can also be a single message if data_iterables is a single array. stop_messages -- Sequence of messages for stop_training. train_callable_class -- Class used to create training callables for the scheduler. By specifying your own class you can implement data transformations before the data is actually fed into the flow (e.g. from 8 bit image to 64 bit double precision). Note that the train_callable_class is only used if a scheduler was provided. If a scheduler is provided the default class used is NodeResultContainer. """ self._bi_reset() # normally not required, just for safety if self.is_parallel_training: err = "Parallel training is already underway." raise ParallelBiFlowException(err) self._train_callable_class = train_callable_class data_iterables, msg_iterables = self._sanitize_training_iterables( data_iterables=data_iterables, msg_iterables=msg_iterables) self._train_data_iterables = data_iterables self._train_msg_iterables = msg_iterables if stop_messages is None: stop_messages = [None] * len(data_iterables) self._stop_messages = stop_messages self._flownode = BiFlowNode(BiFlow(self.flow)) self._i_train_node = 0 self._next_train_phase()
def use_results(self, results): """Use the result from the scheduler. During parallel training this will start the next training phase. For parallel execution this will return the result, like a normal execute would. In addition it will join any forked nodes. results -- Iterable containing the results, normally the return value of scheduler.ResultContainer.get_results(). The individual results can be the return values of the tasks. """ if self.is_parallel_training: for result in results: self._flownode.join(result) # perform local stop_training with result check self._stop_training_hook() result = self._flownode.stop_training( self._stop_messages[self._i_train_node]) self._post_stop_training_hook() if (result is not None): target = result[2] # values of +1, -1 and EXIT_TARGET are tolerated if target not in [1, -1, EXIT_TARGET]: err = ("Target node not found in flow during " + "stop_training phase, last result: " + str(result)) raise BiFlowException(err) self._flownode.bi_reset() if self.verbose: print("finished parallel training phase of node no. " + "%d in parallel flow" % (self._i_train_node + 1)) if not self.flow[self._i_train_node].is_training(): self._i_train_node += 1 self._next_train_phase() elif self.is_parallel_executing: self._exec_data_iterator = None self._exec_msg_iterator = None self._exec_target_iterator = None y_results = [] msg_results = MessageResultContainer() # use internal flownode to join all biflownodes self._flownode = BiFlowNode(BiFlow(self.flow)) for result_tuple in results: result, forked_biflownode = result_tuple # consolidate results if isinstance(result, tuple) and (len(result) == 2): y, msg = result msg_results.add_message(msg) else: y = result if y is not None: try: y_results.append(y) except: err = "Some but not all y return values were None." raise BiFlowException(err) else: y_results = None # join biflownode if forked_biflownode is not None: self._flownode.join(forked_biflownode) # return results if y_results is not None: y_results = n.concatenate(y_results) return (y_results, msg_results.get_message()) else: err = "It seems that there are no results to retrieve." raise BiFlowException(err)
def execute(self, iterable=None, msg_iterable=None, target_iterable=None, scheduler=None, execute_callable_class=None, overwrite_result_container=True): """Execute the flow and return (y, msg). If a scheduler is provided the execution will be done in parallel on the scheduler. iterable -- Single array or iterable. msg_iterable -- Single message or iterable. target_iterable -- Single target or iterable. scheduler -- Value can be either None for normal execution (default value) or a Scheduler instance for parallel execution with the scheduler. execute_callable_class -- Class used to create execution callables for the scheduler. By specifying your own class you can implement data transformations before the data is actually fed into the flow (e.g. from 8 bit image to 64 bit double precision). Note that the execute_callable_class is only used if a scheduler was provided. If a scheduler is provided the default class used is NodeResultContainer. overwrite_result_container -- If set to True (default value) then the result container in the scheduler will be overwritten with an instance of OrderedResultContainer, if it is not already an instance of OrderedResultContainer. """ if self.is_parallel_training: raise ParallelBiFlowException("Parallel training is underway.") if scheduler is None: if execute_callable_class is not None: err = ("A execute_callable_class was specified but no " "scheduler was given, so the execute_callable_class " "has no effect.") raise ParallelBiFlowException(err) return super(ParallelBiFlow, self).execute(iterable, msg_iterable, target_iterable) if execute_callable_class is None: execute_callable_class = BiFlowExecuteCallable # check that the scheduler is compatible if overwrite_result_container: if not isinstance(scheduler.result_container, parallel.ExecuteResultContainer): scheduler.result_container = parallel.ExecuteResultContainer() # do parallel execution self._flownode = BiFlowNode(BiFlow(self.flow)) try: self.setup_parallel_execution( iterable=iterable, msg_iterable=msg_iterable, target_iterable=target_iterable, execute_callable_class=execute_callable_class) while self.task_available: task = self.get_task() scheduler.add_task(*task) result = self.use_results(scheduler.get_results()) finally: # reset remaining iterator references, which cannot be pickled self._exec_data_iterator = None self._exec_msg_iterator = None self._exec_target_iterator = None return result