def _stop_training(self): self.data = numx.array(self.data, dtype=self.dtype) self.data.shape = (self.tlen, self.input_dim) # choose initial centroids unless they are already given if not self._centroids: import random centr_idx = random.sample(xrange(self.tlen), self._num_clusters) # numx_rand.permutation(self.tlen)[:self._num_clusters] centroids = self.data[centr_idx] else: centroids = self._centroids for step in xrange(self.max_iter): # list of (sum_position, num_clusters) new_centroids = [(0.0, 0.0)] * len(centroids) # cluster for x in self.data: idx = self._nearest_centroid_idx(x, centroids) # update position and count pos_count = (new_centroids[idx][0] + x, new_centroids[idx][1] + 1.0) new_centroids[idx] = pos_count # get new centroid position new_centroids = numx.array( [c[0] / c[1] if c[1] > 0.0 else centroids[idx] for idx, c in enumerate(new_centroids)] ) # check if we are stable if numx.all(new_centroids == centroids): self._centroids = centroids return centroids = new_centroids
def test_mixed_dict(self): """Test msg being a dict containing an array.""" rescont = MessageResultContainer() msg1 = { "f": 2, "a": np.zeros((10, 3), 'int'), "b": "aaa", "c": 1, } msg2 = { "a": np.ones((15, 3), 'int'), "b": "bbb", "c": 3, "d": 1, } rescont.add_message(msg1) rescont.add_message(msg2) combined_msg = rescont.get_message() a = np.zeros((25, 3), 'int') a[10:] = 1 reference_msg = {"a": a, "c": 4, "b": "aaabbb", "d": 1, "f": 2} assert np.all(reference_msg["a"] == reference_msg["a"]) combined_msg.pop("a") reference_msg.pop("a") assert combined_msg == reference_msg
def _stop_training(self): self.data = numx.array(self.data, dtype=self.dtype) self.data.shape = (self.tlen, self.input_dim) # choose initial centroids unless they are already given if not self._centroids: import random centr_idx = random.sample(xrange(self.tlen), self._num_clusters) #numx_rand.permutation(self.tlen)[:self._num_clusters] centroids = self.data[centr_idx] else: centroids = self._centroids for step in xrange(self.max_iter): # list of (sum_position, num_clusters) new_centroids = [(0., 0.)] * len(centroids) # cluster for x in self.data: idx = self._nearest_centroid_idx(x, centroids) # update position and count pos_count = (new_centroids[idx][0] + x, new_centroids[idx][1] + 1.) new_centroids[idx] = pos_count # get new centroid position new_centroids = numx.array([ c[0] / c[1] if c[1] > 0. else centroids[idx] for idx, c in enumerate(new_centroids) ]) # check if we are stable if numx.all(new_centroids == centroids): self._centroids = centroids return centroids = new_centroids
def test_mixed_dict(self): """Test msg being a dict containing an array.""" rescont = MessageResultContainer() msg1 = { "f": 2, "a": np.zeros((10,3), 'int'), "b": "aaa", "c": 1, } msg2 = { "a": np.ones((15,3), 'int'), "b": "bbb", "c": 3, "d": 1, } rescont.add_message(msg1) rescont.add_message(msg2) combined_msg = rescont.get_message() a = np.zeros((25,3), 'int') a[10:] = 1 reference_msg = {"a": a, "c": 4, "b": "aaabbb", "d": 1, "f": 2} assert np.all(reference_msg["a"] == reference_msg["a"]) combined_msg.pop("a") reference_msg.pop("a") assert combined_msg == reference_msg
def _check_train_args(self, x, labels): if (isinstance(labels, (list, tuple, numx.ndarray)) and len(labels) != x.shape[0]): msg = ("The number of labels should be equal to the number of " "datapoints (%d != %d)" % (len(labels), x.shape[0])) raise mdp.TrainingException(msg) if (not isinstance(labels, (list, tuple, numx.ndarray))): labels = [labels] if (not numx.all(map(lambda x: abs(x) == 1, labels))): msg = "The labels must be either -1 or 1." raise mdp.TrainingException(msg)
def test_switchboard_gradient1(self): """Test that gradient is correct for a tiny switchboard.""" sboard = mdp.hinet.Switchboard(input_dim=4, connections=[2,0]) x = numx_rand.random((2,4)) mdp.activate_extension("gradient") try: result = sboard._gradient(x) grad = result[1]["grad"] ref_grad = numx.array([[[0,0,1,0], [1,0,0,0]], [[0,0,1,0], [1,0,0,0]]], dtype=grad.dtype) assert numx.all(grad == ref_grad) finally: mdp.deactivate_extension("gradient")
def test_switchboard_gradient1(self): """Test that gradient is correct for a tiny switchboard.""" sboard = mdp.hinet.Switchboard(input_dim=4, connections=[2, 0]) x = numx_rand.random((2, 4)) mdp.activate_extension("gradient") try: result = sboard._gradient(x) grad = result[1]["grad"] ref_grad = numx.array( [[[0, 0, 1, 0], [1, 0, 0, 0]], [[0, 0, 1, 0], [1, 0, 0, 0]]], dtype=grad.dtype) assert numx.all(grad == ref_grad) finally: mdp.deactivate_extension("gradient")
def _check_train_args(self, x, labels): if isinstance(labels, (list, tuple, numx.ndarray)) and len(labels) != x.shape[0]: msg = "The number of labels should be equal to the number of " "datapoints (%d != %d)" % ( len(labels), x.shape[0], ) raise mdp.TrainingException(msg) if not isinstance(labels, (list, tuple, numx.ndarray)): labels = [labels] if not numx.all(map(lambda x: abs(x) == 1, labels)): msg = "The labels must be either -1 or 1." raise mdp.TrainingException(msg)
def test_execute_routing(self): """Test the standard routing for messages.""" sboard = BiSwitchboard(input_dim=3, connections=[2,0,1]) x = n.array([[1,2,3],[4,5,6]]) msg = { "string": "blabla", "list": [1,2], "data": x.copy(), # should be mapped by switchboard "data2": n.zeros(3), # should not be modified "data3": n.zeros((3,4)), # should not be modified } y, out_msg = sboard.execute(x, msg) reference_y = n.array([[3,1,2],[6,4,5]]) assert (y == reference_y).all() assert out_msg["string"] == msg["string"] assert out_msg["list"] == msg["list"] assert n.all(out_msg["data"] == reference_y) assert out_msg["data2"].shape == (3,) assert out_msg["data3"].shape == (3,4)
def test_switchboard_gradient2(self): """Test gradient for a larger switchboard.""" dim = 100 connections = [int(i) for i in numx.random.random((dim,)) * (dim-1)] sboard = mdp.hinet.Switchboard(input_dim=dim, connections=connections) x = numx.random.random((10, dim)) # assume a 5-dimensional gradient at this stage grad = numx.random.random((10, dim, 5)) # original reference implementation def _switchboard_grad(self, x): grad = numx.zeros((self.output_dim, self.input_dim)) grad[range(self.output_dim), self.connections] = 1 return numx.tile(grad, (len(x), 1, 1)) with mdp.extension("gradient"): result = sboard._gradient(x, grad) ext_grad = result[1]["grad"] tmp_grad = _switchboard_grad(sboard, x) ref_grad = numx.asarray([numx.dot(tmp_grad[i], grad[i]) for i in range(len(tmp_grad))]) assert numx.all(ext_grad == ref_grad)
def test_switchboard_gradient2(self): """Test gradient for a larger switchboard.""" dim = 100 connections = [int(i) for i in numx.random.random((dim, )) * (dim - 1)] sboard = mdp.hinet.Switchboard(input_dim=dim, connections=connections) x = numx.random.random((10, dim)) # assume a 5-dimensional gradient at this stage grad = numx.random.random((10, dim, 5)) # original reference implementation def _switchboard_grad(self, x): grad = numx.zeros((self.output_dim, self.input_dim)) grad[range(self.output_dim), self.connections] = 1 return numx.tile(grad, (len(x), 1, 1)) with mdp.extension("gradient"): result = sboard._gradient(x, grad) ext_grad = result[1]["grad"] tmp_grad = _switchboard_grad(sboard, x) ref_grad = numx.asarray( [numx.dot(tmp_grad[i], grad[i]) for i in range(len(tmp_grad))]) assert numx.all(ext_grad == ref_grad)
def test_layer_gradient(self): """Test gradient for a simple layer.""" node1 = mdp.nodes.SFA2Node(input_dim=4, output_dim=3) node2 = mdp.nodes.SFANode(input_dim=6, output_dim=2) layer = mdp.hinet.Layer([node1, node2]) x = numx_rand.random((100, 10)) layer.train(x) layer.stop_training() mdp.activate_extension("gradient") try: x = numx_rand.random((7, 10)) result = layer._gradient(x) grad = result[1]["grad"] # get reference result grad1 = node1._gradient(x[:, :node1.input_dim])[1]["grad"] grad2 = node2._gradient(x[:, node1.input_dim:])[1]["grad"] ref_grad = numx.zeros(((7, 5, 10))) ref_grad[:, :node1.output_dim, :node1.input_dim] = grad1 ref_grad[:, node1.output_dim:, node1.input_dim:] = grad2 assert numx.all(grad == ref_grad) finally: mdp.deactivate_extension("gradient")
def test_layer_gradient(self): """Test gradient for a simple layer.""" node1 = mdp.nodes.SFA2Node(input_dim=4, output_dim=3) node2 = mdp.nodes.SFANode(input_dim=6, output_dim=2) layer = mdp.hinet.Layer([node1, node2]) x = numx_rand.random((100,10)) layer.train(x) layer.stop_training() mdp.activate_extension("gradient") try: x = numx_rand.random((7,10)) result = layer._gradient(x) grad = result[1]["grad"] # get reference result grad1 = node1._gradient(x[:, :node1.input_dim])[1]["grad"] grad2 = node2._gradient(x[:, node1.input_dim:])[1]["grad"] ref_grad = numx.zeros(((7,5,10))) ref_grad[:, :node1.output_dim, :node1.input_dim] = grad1 ref_grad[:, node1.output_dim:, node1.input_dim:] = grad2 assert numx.all(grad == ref_grad) finally: mdp.deactivate_extension("gradient")
def test_quadexpan_gradient1(self): """Test validity of gradient for QuadraticExpansionBiNode.""" node = mdp.nodes.QuadraticExpansionNode() x = numx.array([[1, 3, 4]]) node.execute(x) mdp.activate_extension("gradient") try: result = node._gradient(x) grad = result[1]["grad"] reference = numx.array( [[[ 1, 0, 0], # x1 [ 0, 1, 0], # x2 [ 0, 0, 1], # x3 [ 2, 0, 0], # x1x1 [ 3, 1, 0], # x1x2 [ 4, 0, 1], # x1x3 [ 0, 6, 0], # x2x2 [ 0, 4, 3], # x2x3 [ 0, 0, 8]]]) # x3x3 assert numx.all(grad == reference) finally: mdp.deactivate_extension("gradient")
def test_quadexpan_gradient1(self): """Test validity of gradient for QuadraticExpansionBiNode.""" node = mdp.nodes.QuadraticExpansionNode() x = numx.array([[1, 3, 4]]) node.execute(x) mdp.activate_extension("gradient") try: result = node._gradient(x) grad = result[1]["grad"] reference = numx.array([[ [1, 0, 0], # x1 [0, 1, 0], # x2 [0, 0, 1], # x3 [2, 0, 0], # x1x1 [3, 1, 0], # x1x2 [4, 0, 1], # x1x3 [0, 6, 0], # x2x2 [0, 4, 3], # x2x3 [0, 0, 8] ]]) # x3x3 assert numx.all(grad == reference) finally: mdp.deactivate_extension("gradient")
def test_message_splitting(self): """Test message array splitting and combination.""" node = DummyBiNode(input_dim=3) clonelayer = CloneBiLayer(node, 2, use_copies=True) x = n.random.random((10, 6)) data1 = n.random.random((10, 4)) # should be split data2 = n.random.random((10, 5)) # should not be touched msg = { "string": "blabla", "list": [1,2], "data1": data1, "data2": data2, } y, out_msg = clonelayer.execute(x, msg) node1, node2 = clonelayer.nodes assert n.all(x == y) assert out_msg["string"] == msg["string"] assert out_msg["list"] == msg["list"] assert n.all(out_msg["data1"] == data1) assert n.all(node1.data1 == data1[:,:2]) assert n.all(node2.data1 == data1[:,2:]) assert out_msg["data2"] is data2 assert n.all(node1.data2 is data2) assert n.all(node2.data2 is data2)