def test_evaluate_is_batch_normalized(self): Y1 = np.ones((4, 1, 2)) T1 = create_targets_object(np.ones((4, 1, 2)) * 2) Y2 = np.ones((4, 10, 2)) T2 = create_targets_object(np.ones((4, 10, 2)) * 2) for err in self.error_funcs: e1, d1 = err(Y1, T1) e2, d2 = err(Y2, T2) self.assertAlmostEqual(e1, e2)
def check_deltas(net, X=None, T=None, n_timesteps=3, n_batches=5, rnd=np.random.RandomState()): if X is None: X = rnd.randn(n_timesteps, n_batches, net.get_input_size()) if T is None: T = rnd.randn(n_timesteps, n_batches, net.get_output_size()) # normalize targets to sum to one T = T / T.sum(2).reshape(n_timesteps, n_batches, 1) T = create_targets_object(T) ######### calculate gradient ########## net.forward_pass(X) delta_calc = net.backward_pass(T).flatten() ######### estimate gradient ########## def f(X): net.forward_pass(X.reshape(n_timesteps, n_batches, -1)) return net.calculate_error(T) delta_approx = approx_fprime(X.copy().flatten(), f, 1e-7) return np.sum( (delta_approx - delta_calc)**2) / n_batches, delta_calc, delta_approx
def check_gradient(net, X=None, T=None, n_timesteps=3, n_batches=5, rnd=np.random.RandomState()): if X is None: X = rnd.randn(n_timesteps, n_batches, net.get_input_size()) if T is None: T = rnd.randn(n_timesteps, n_batches, net.get_output_size()) # normalize targets to sum to one T = T / T.sum(2).reshape(n_timesteps, n_batches, 1) T = create_targets_object(T) weights = net.param_buffer.copy() ######### calculate gradient ########## net.forward_pass(X) net.backward_pass(T) grad_calc = net.calc_gradient().squeeze().copy() ######### estimate gradient ########## def f(W): net.param_buffer = W net.forward_pass(X) return net.calculate_error(T).copy() grad_approx = approx_fprime(weights.copy(), f, 1e-7) return np.sum( (grad_approx - grad_calc)**2) / n_batches, grad_calc, grad_approx
def test_shuffle_is_random(self): X = np.arange(10).reshape(1, -1, 1) T = create_targets_object(np.arange(10).reshape(1, -1, 1)) global_rnd.set_seed(1) _, _, s1 = shuffle_data(X, T) _, _, s2 = shuffle_data(X, T) self.assertFalse(np.all(s1 == s2))
def test_shuffle_seed_overwrites_global_seed(self): X = np.arange(10).reshape(1, -1, 1) T = create_targets_object(np.arange(10).reshape(1, -1, 1)) global_rnd.set_seed(1) _, _, s1 = shuffle_data(X, T, seed=1) global_rnd.set_seed(1) _, _, s2 = shuffle_data(X, T, seed=2) self.assertFalse(np.all(s1 == s2))
def test_shuffle_depends_on_global_seed(self): X = np.arange(10).reshape(1, -1, 1) T = create_targets_object(np.arange(10).reshape(1, -1, 1)) global_rnd.set_seed(1) _, _, s1 = shuffle_data(X, T) global_rnd.set_seed(1) _, _, s2 = shuffle_data(X, T) np.testing.assert_array_equal(s1.flat, s2.flat)
def test_finite_differences(self): Y = np.zeros((4, 3, 2)) + 0.5 T = create_targets_object(np.ones((4, 3, 2))) for err in self.error_funcs: def f(X): return err(X.reshape(*Y.shape), T)[0] delta_approx = approx_fprime(Y.flatten().copy(), f, 1e-7) delta_calc = err(Y, T)[1].flatten() np.testing.assert_array_almost_equal(delta_approx, delta_calc)
def read_data(candidates, targets='T'): input_data = load_data(get_files_containing(candidates, 'X')) targets_data = load_data(get_files_containing(candidates, targets)) mask_candidates = get_files_containing(candidates, 'M') if mask_candidates: mask = load_data(mask_candidates) else: mask = None targets = create_targets_object(targets_data, mask) return input_data, targets
def transform_ds_to_nsp(ds): """ Takes a dataset dictionary like the one returned from load_dataset and transforms it into a next-step-prediction task. """ ds_nsp = {} for use in ['training', 'validation', 'test']: if use not in ds or ds[use] is None: continue nsp_targets = create_targets_object(ds[use][0][1:]) ds_nsp[use] = (ds[use][0][:-1, :, :], nsp_targets) return ds_nsp
def _labeling_binarizing(outputs, targets): # TODO: use mask to mask deltas time_size, batch_size, label_count = outputs.shape deltas = np.zeros((time_size, batch_size, label_count)) deltas[:] = float('-inf') errors = np.zeros(batch_size) targets = create_targets_object(targets) for b, (y, t) in enumerate(Online(outputs, targets, verbose=False)()): err, delt = ctcpp(y, list(t.data[0])) errors[b] = err deltas[:y.shape[0], b:b+1, :] = delt.as_array() return np.mean(errors), -deltas / batch_size
def setUp(self): self.input_size = 2 self.output_size = 3 self.layer_types = [ForwardLayer, RnnLayer, MrnnLayer, LstmLayer, Lstm97Layer] self.activation_functions = ["linear", "tanh", "tanhx2", "sigmoid", "softmax"] n_timesteps = 5 n_batches = 6 self.input_data = rnd.randn(n_timesteps, n_batches, self.input_size) self.targets = rnd.randn(n_timesteps, n_batches, self.output_size) self.targets = self.targets / self.targets.sum(2).reshape(n_timesteps, n_batches, 1) self.targets = create_targets_object(self.targets)
def setUp(self): self.input_size = 2 self.output_size = 3 self.layer_types = [ ForwardLayer, RnnLayer, MrnnLayer, LstmLayer, Lstm97Layer ] self.activation_functions = [ "linear", "tanh", "tanhx2", "sigmoid", "softmax" ] n_timesteps = 5 n_batches = 6 self.input_data = rnd.randn(n_timesteps, n_batches, self.input_size) self.targets = rnd.randn(n_timesteps, n_batches, self.output_size) self.targets = self.targets / self.targets.sum(2).reshape( n_timesteps, n_batches, 1) self.targets = create_targets_object(self.targets)
def check_deltas(net, X=None, T=None, n_timesteps=3, n_batches=5, rnd=np.random.RandomState()): if X is None: X = rnd.randn(n_timesteps, n_batches, net.get_input_size()) if T is None: T = rnd.randn(n_timesteps, n_batches, net.get_output_size()) # normalize targets to sum to one T = T / T.sum(2).reshape(n_timesteps, n_batches, 1) T = create_targets_object(T) ######### calculate gradient ########## net.forward_pass(X) delta_calc = net.backward_pass(T).flatten() ######### estimate gradient ########## def f(X): net.forward_pass(X.reshape(n_timesteps, n_batches, -1)) return net.calculate_error(T) delta_approx = approx_fprime(X.copy().flatten(), f, 1e-7) return np.sum((delta_approx - delta_calc) ** 2) / n_batches, delta_calc, delta_approx
def check_gradient(net, X=None, T=None, n_timesteps=3, n_batches=5, rnd=np.random.RandomState()): if X is None: X = rnd.randn(n_timesteps, n_batches, net.get_input_size()) if T is None: T = rnd.randn(n_timesteps, n_batches, net.get_output_size()) # normalize targets to sum to one T = T / T.sum(2).reshape(n_timesteps, n_batches, 1) T = create_targets_object(T) weights = net.param_buffer.copy() ######### calculate gradient ########## net.forward_pass(X) net.backward_pass(T) grad_calc = net.calc_gradient().squeeze().copy() ######### estimate gradient ########## def f(W): net.param_buffer = W net.forward_pass(X) return net.calculate_error(T).copy() grad_approx = approx_fprime(weights.copy(), f, 1e-7) return np.sum((grad_approx - grad_calc) ** 2) / n_batches, grad_calc, grad_approx
def test_deriv_shape(self): Y = np.ones((4, 3, 2)) T = create_targets_object(np.ones((4, 3, 2)) * 2) for err in self.error_funcs: e, d = err(Y, T) self.assertEqual(d.shape, Y.shape)
def test_evaluate_returns_scalar(self): Y = np.ones((4, 3, 2)) T = create_targets_object(np.ones((4, 3, 2)) * 2) for err in self.error_funcs: e, d = err(Y, T) self.assertIsInstance(e, float)