def test_multiple_schedulers(): """Test parallel flow training with multiple schedulers.""" flow = parallel.ParallelFlow([ mdp.nodes.SFANode(output_dim=5), mdp.nodes.PolynomialExpansionNode(degree=3), mdp.nodes.SFANode(output_dim=20) ]) data_iterables = [[ n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6) ], None, [n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6)]] schedulers = [parallel.Scheduler(), None, parallel.Scheduler()] flow.train(data_iterables, scheduler=schedulers) # parallel execution iterable = [n.random.random((20, 10)) for _ in xrange(6)] flow.execute(iterable, scheduler=parallel.Scheduler())
def test_scheduler_manager(): """Test context manager interface for scheduler.""" with parallel.Scheduler() as scheduler: for i in range(6): scheduler.add_task(i, lambda x: x**2) results = scheduler.get_results() assert n.all(results == n.array([0, 1, 4, 9, 16, 25]))
def test_execute_fork(): """Test the forking of a node based on use_execute_fork.""" class _test_ExecuteForkNode(mdp.nodes.IdentityNode): # Note: The explicit signature is important to preserve the dim # information during the fork. def __init__(self, input_dim=None, output_dim=None, dtype=None): self.n_forks = 0 self.n_joins = 0 super(_test_ExecuteForkNode, self).__init__(input_dim=input_dim, output_dim=output_dim, dtype=dtype) class Parallel_test_ExecuteForkNode(parallel.ParallelExtensionNode, _test_ExecuteForkNode): def _fork(self): self.n_forks += 1 return self._default_fork() def _join(self, forked_node): self.n_joins += forked_node.n_joins + 1 def use_execute_fork(self): return True try: n_chunks = 6 ## Part 1: test execute fork during flow training data_iterables = [[n.random.random((30, 10)) for _ in range(n_chunks)], None, [n.random.random((30, 10)) for _ in range(n_chunks)], None] flow = parallel.ParallelFlow([ mdp.nodes.PCANode(output_dim=5), _test_ExecuteForkNode(), mdp.nodes.SFANode(), _test_ExecuteForkNode() ]) scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) for node in flow: if isinstance(node, _test_ExecuteForkNode): assert node.n_forks == 2 * n_chunks + 2 assert node.n_joins == 2 * n_chunks # reset the counters to prepare the execute test node.n_forks = 0 node.n_joins = 0 ## Part 2: test execute fork during flow execute data_iterable = [n.random.random((30, 10)) for _ in range(n_chunks)] flow.execute(data_iterable, scheduler=scheduler) for node in flow: if isinstance(node, _test_ExecuteForkNode): assert node.n_forks == n_chunks assert node.n_joins == n_chunks finally: # unregister the testing class del mdp.get_extensions()["parallel"][_test_ExecuteForkNode] scheduler.shutdown()
def test_scheduler(): """Test scheduler with 6 tasks.""" scheduler = parallel.Scheduler() for i in range(6): scheduler.add_task(i, lambda x: x**2) results = scheduler.get_results() scheduler.shutdown() # check result results = n.array(results) assert n.all(results == n.array([0, 1, 4, 9, 16, 25]))
def test_layer(self): """Test Simple random test with three nodes.""" node1 = mdp.nodes.SFANode(input_dim=10, output_dim=5) node2 = mdp.nodes.SFANode(input_dim=17, output_dim=3) node3 = mdp.nodes.SFANode(input_dim=3, output_dim=1) layer = mdp.hinet.Layer([node1, node2, node3]) flow = parallel.ParallelFlow([layer]) data_iterables = [[n.random.random((10, 30)) for _ in xrange(3)]] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler)
def test_firstnode(): """Test special case in which the first node is untrainable. This tests the proper initialization of the internal variables. """ flow = parallel.ParallelFlow([ mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.SFANode(output_dim=20) ]) data_iterables = [None, n.random.random((6, 20, 10))] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler)
def test_FDANode(): """Test Parallel FDANode.""" # this test code is an adaption of the FDANode test precision = 4 mean1 = [0., 2.] mean2 = [0., -2.] std_ = numx.array([1., 0.2]) npoints = 50000 rot = 45 # input data: two distinct gaussians rotated by 45 deg def distr(size): return numx_rand.normal(0, 1., size=(size)) * std_ x1 = distr((npoints, 2)) + mean1 utils.rotate(x1, rot, units='degrees') x2 = distr((npoints, 2)) + mean2 utils.rotate(x2, rot, units='degrees') # labels cl1 = numx.ones((x1.shape[0], ), dtype='d') cl2 = 2. * numx.ones((x2.shape[0], ), dtype='d') flow = parallel.ParallelFlow([parallel.ParallelFDANode()]) flow.train([[(x1, cl1), (x2, cl2)]], scheduler=parallel.Scheduler()) fda_node = flow[0] assert fda_node.tlens[1] == npoints assert fda_node.tlens[2] == npoints m1 = numx.array([mean1]) m2 = numx.array([mean2]) utils.rotate(m1, rot, units='degrees') utils.rotate(m2, rot, units='degrees') assert_array_almost_equal(fda_node.means[1], m1, 2) assert_array_almost_equal(fda_node.means[2], m2, 2) y = flow.execute([x1, x2], scheduler=parallel.Scheduler()) assert_array_almost_equal(numx.mean(y, axis=0), [0., 0.], precision) assert_array_almost_equal(numx.std(y, axis=0), [1., 1.], precision) assert_almost_equal(utils.mult(y[:, 0], y[:, 1].T), 0., precision) v1 = old_div(fda_node.v[:, 0], fda_node.v[0, 0]) assert_array_almost_equal(v1, [1., -1.], 2) v1 = old_div(fda_node.v[:, 1], fda_node.v[0, 1]) assert_array_almost_equal(v1, [1., 1.], 2)
def test_non_iterator(): """Test parallel training and execution with a single array.""" flow = parallel.ParallelFlow([ mdp.nodes.SFANode(output_dim=5), mdp.nodes.PolynomialExpansionNode(degree=3), mdp.nodes.SFANode(output_dim=20) ]) data_iterables = n.random.random((200, 10)) * n.arange(1, 11) scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) # test execution x = n.random.random((100, 10)) flow.execute(x)
def test_tasks(): """Test parallel training and execution by running the tasks.""" flow = parallel.ParallelFlow([ mdp.nodes.SFANode(output_dim=5), mdp.nodes.PolynomialExpansionNode(degree=3), mdp.nodes.SFANode(output_dim=20) ]) data_iterables = [[ n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6) ], None, [n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6)]] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) # parallel execution iterable = [n.random.random((20, 10)) for _ in xrange(6)] flow.execute(iterable, scheduler=scheduler)
def test_multiphase_checkpoints(): """Test parallel checkpoint flow.""" sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = mdp.hinet.FlowNode(mdp.Flow([sfa_node, sfa2_node])) flow = parallel.ParallelCheckpointFlow([ flownode, mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.SFANode(output_dim=5) ]) data_iterables = [[n.random.random((30, 10)) for _ in xrange(6)], None, [n.random.random((30, 10)) for _ in xrange(6)]] checkpoint = mdp.CheckpointFunction() scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler, checkpoints=checkpoint)
def test_nonparallel2(): """Test training for mixture of parallel and non-parallel nodes.""" # TODO: use a node with no parallel here sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = mdp.hinet.FlowNode(mdp.Flow([sfa_node, sfa2_node])) flow = parallel.ParallelFlow([ flownode, mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.SFANode(output_dim=5) ]) data_iterables = [[ n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6) ], None, [n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6)]] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) # test execution x = n.random.random([100, 10]) flow.execute(x)
def test_parallelnet(self): """Test a simple parallel net with big data. Includes ParallelFlowNode, ParallelCloneLayer, ParallelSFANode and training via a ParallelFlow. """ noisenode = mdp.nodes.NormalNoiseNode(input_dim=20 * 20, noise_args=(0, 0.0001)) sfa_node = mdp.nodes.SFANode(input_dim=20 * 20, output_dim=10) switchboard = hinet.Rectangular2dSwitchboard(in_channels_xy=100, field_channels_xy=20, field_spacing_xy=10) flownode = mdp.hinet.FlowNode(mdp.Flow([noisenode, sfa_node])) sfa_layer = mdp.hinet.CloneLayer(flownode, switchboard.output_channels) flow = parallel.ParallelFlow([switchboard, sfa_layer]) data_iterables = [ None, [n.random.random((10, 100 * 100)) for _ in xrange(3)] ] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler)
def test_nonparallel3(): """Test training for non-parallel nodes.""" # TODO: use a node with no parallel here sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) # TODO: use a node with no parallel here sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flow = parallel.ParallelFlow([sfa_node, sfa2_node]) data_iterables = [[ n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6) ], [n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6)]] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) while flow.is_parallel_training: results = [] while flow.task_available(): task = flow.get_task() results.append(task()) flow.use_results(results) # test execution x = n.random.random([100, 10]) flow.execute(x)
def test_multiphase(): """Test parallel training and execution for nodes with multiple training phases. """ sfa_node = mdp.nodes.SFANode(input_dim=10, output_dim=8) sfa2_node = mdp.nodes.SFA2Node(input_dim=8, output_dim=6) flownode = mdp.hinet.FlowNode(mdp.Flow([sfa_node, sfa2_node])) flow = parallel.ParallelFlow([ flownode, mdp.nodes.PolynomialExpansionNode(degree=2), mdp.nodes.SFANode(output_dim=5) ]) data_iterables = [[ n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6) ], None, [n.random.random((30, 10)) * n.arange(1, 11) for _ in xrange(6)]] scheduler = parallel.Scheduler() flow.train(data_iterables, scheduler=scheduler) # test normal execution x = n.random.random([100, 10]) flow.execute(x) # parallel execution iterable = [n.random.random((20, 10)) for _ in xrange(6)] flow.execute(iterable, scheduler=scheduler)