def test_loss(loss, weight=1): print 'Testing loss <%s>, weight=%g' % (loss.get_name(), weight) loss.set_weight(weight) sx, sy = 3, 4 x = gnp.randn(sx, sy) t = gnp.randn(sx, sy) if loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif loss.target_should_be_normalized(): t = t - t.min(axis=1)[:, gnp.newaxis] + 1 t /= t.sum(axis=1)[:, gnp.newaxis] elif loss.target_should_be_hinge(): new_t = -np.ones(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) loss.load_target(t) def f(w): return loss.compute_loss_and_grad(gnp.garray(w.reshape(sx, sy)))[0] fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) backprop_grad = loss.compute_loss_and_grad( x, compute_grad=True)[1].asarray().ravel() test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def initParams(self): # crude way of random initialization (random seed) for parameters import time self.seed = int(time.time()) % 100000 # for tt in range(self.seed): gp.rand() sizes = [self.inputDim] + self.layerSizes + [self.outputDim] scales = [ gp.sqrt(6) / gp.sqrt(n + m) for n, m in zip(sizes[:-1], sizes[1:]) ] self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \ for n,m,s in zip(sizes[:-1],sizes[1:],scales)] self.hActs = [gp.empty((s, self.mbSize)) for s in sizes] if self.train: self.deltas = [gp.empty((s, self.mbSize)) for s in sizes[1:]] self.grad = [[gp.empty(w.shape), gp.empty(b.shape)] for w, b in self.stack] for tt in range(self.seed): gp.rand() self.stack = [[ ws[0] + .01 * gp.randn(ws[0].shape), ws[1] + .01 * gp.randn(ws[1].shape) ] for ws in self.stack]
def initRandom(self): gp.seed_rand() r = gp.sqrt(6) / gp.sqrt(self.hDim + self.vDim + 1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(self.config, self.name)
def test_batch_normalization_layer(): print 'Testing Batch Normalization layer' in_dim = 3 n_cases = 5 x = gnp.randn(n_cases, in_dim) * 2 + 3 t = gnp.randn(n_cases, in_dim) * 2 loss = ls.get_loss_from_type_name(ls.LOSS_NAME_SQUARED) loss.load_target(t) bn_layer = ly.BatchNormalizationLayer(in_dim) bn_layer.params.gamma = gnp.rand(in_dim) bn_layer.params.beta = gnp.rand(in_dim) w_0 = bn_layer.params.get_param_vec() y = bn_layer.forward_prop(x, is_test=False) _, loss_grad = loss.compute_not_weighted_loss_and_grad(y, True) bn_layer.backward_prop(loss_grad) backprop_grad = bn_layer.params.get_grad_vec() def f(w): bn_layer.params.set_param_from_vec(w) y = bn_layer.forward_prop(x, is_test=False) return loss.compute_not_weighted_loss_and_grad(y)[0] fdiff_grad = finite_difference_gradient(f, w_0) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=_BN_GRAD_CHECK_EPS, use_rel_err=True) print '' return test_passed
def test_loss(loss, weight=1): print 'Testing loss <%s>, weight=%g' % (loss.get_name(), weight) loss.set_weight(weight) sx, sy = 3, 4 x = gnp.randn(sx, sy) t = gnp.randn(sx, sy) if loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif loss.target_should_be_normalized(): t = t - t.min(axis=1)[:,gnp.newaxis] + 1 t /= t.sum(axis=1)[:,gnp.newaxis] elif loss.target_should_be_hinge(): new_t = -np.ones(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) loss.load_target(t) def f(w): return loss.compute_loss_and_grad(gnp.garray(w.reshape(sx, sy)))[0] fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) backprop_grad = loss.compute_loss_and_grad(x, compute_grad=True)[1].asarray().ravel() test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_random_feature_mmd_loss(sigma=[1, 10], scale_weight=[0.5, 1], n_features=3): print 'Testing random feature MMD loss' n_dims = 2 n_target = 5 n_pred = 5 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN, sigma=sigma, scale_weight=scale_weight, n_features=n_features) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_pair_mmd_loss_multiscale(sigma=[1, 10], scale_weight=None): print 'Testing generative pair multi-scale MMD loss' n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=100): print 'Testing linear time minibatch MMD loss' n_dims = 3 n_target = 10 n_pred = 10 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MINIBATCH_MMDGEN, sigma=sigma, minibatch_size=minibatch_size) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_rnn_on_nn(add_noise=False): print 'Testing RnnOnNeuralNet, ' + ('with' if add_noise else 'without') + ' noise' n_cases = 5 net = create_default_rnn_on_nn(add_noise=add_noise) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) seed = 8 gnp.seed_rand(seed) net.load_target(t) net.clear_gradient() net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], pred_per_example=1, target_per_example=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel per example MMD loss <%s>' % loss_name if len(target_per_example) == 1: target_per_example = target_per_example * 3 n_dims = 3 n_target = sum(target_per_example) n_pred = len(target_per_example) * pred_per_example pred = gnp.randn(n_pred, n_dims) target = [] for i_target in target_per_example: target.append(gnp.randn(i_target, n_dims)) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight, pred_per_example=pred_per_example) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def __init__(self, config, name): super(AE, self).__init__(config, name) #dimension of hidden layer self.hDim = int(self.readField(config, name, "hidden_dimension")) #dimension of visible layer self.vDim = int(self.readField(config, name, "visible_dimension")) #baise for hidden layer if self.hDim>0: self.b1 = gp.zeros(self.hDim) #biase for visible layer if self.vDim>0: self.b2 = gp.zeros(self.vDim) #init weight: uniform between +-sqrt(6)/sqrt(v+h+1) if self.hDim*self.vDim>0: gp.seed_rand() r=gp.sqrt(6)/gp.sqrt(self.hDim+self.vDim+1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(config, name)
def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel MMD loss <%s>' % loss_name n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def __init__(self, config, name): super(AE, self).__init__(config, name) #dimension of hidden layer self.hDim = int(self.readField(config, name, "hidden_dimension")) #dimension of visible layer self.vDim = int(self.readField(config, name, "visible_dimension")) #baise for hidden layer if self.hDim > 0: self.b1 = gp.zeros(self.hDim) #biase for visible layer if self.vDim > 0: self.b2 = gp.zeros(self.vDim) #init weight: uniform between +-sqrt(6)/sqrt(v+h+1) if self.hDim * self.vDim > 0: gp.seed_rand() r = gp.sqrt(6) / gp.sqrt(self.hDim + self.vDim + 1) self.W1 = gp.randn(self.vDim, self.hDim) * 2 * r - r self.W2 = gp.randn(self.hDim, self.vDim) * 2 * r - r self.initUpdate() self.initHyperParam(config, name)
def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel MMD loss <%s>' % loss_name n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_linear_time_mmd_loss(sigma=1.0, use_modified_loss=False, use_absolute_value=False): print 'Testing linear time MMD loss, sigma=%s' % str(sigma) n_dims = 3 n_target = 4 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MMDGEN, sigma=sigma, use_modified_loss=use_modified_loss, use_absolute_value=use_absolute_value) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def init_weight(self, init_scale): """Initialize the weights to small normally distributed numbers""" # note the weight and the bias are treated separately for memory # efficiency self.W = init_scale * gnp.randn(self.in_dim, self.out_dim) self.b = init_scale * gnp.randn(1, self.out_dim) self.Winc = self.W * 0 self.binc = self.b * 0
def test_neuralnet(add_noise=False, loss_after_nonlin=False, use_batch_normalization=False): print 'Testing NeuralNet, ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('with BN' if use_batch_normalization else 'without BN') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_neuralnet(dropout_rate, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) if net.loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif net.loss.target_should_be_normalized(): t = t - t.min(axis=1)[:, gnp.newaxis] + 1 t /= t.sum(axis=1)[:, gnp.newaxis] net.load_target(t) if add_noise: gnp.seed_rand(seed) net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.clear_gradient() net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) eps = _BN_GRAD_CHECK_EPS if use_batch_normalization else _GRAD_CHECK_EPS test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=eps, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def pt_init(self, init_var=1e-2, init_bias=0., rho=0.5, lmbd=0., l2=0., SI=15, **kwargs): """ """ # 2*self.shape[0]: precision parameters have size shape[0] pt_params = gzeros(self.m_end + self.shape[1] + 2 * self.shape[0]) if init_var is None: pt_params[:self.m_end] = gpu.garray( init_SI(self.shape, sparsity=SI)).ravel() else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:-self.shape[0]] = init_bias pt_params[-self.shape[0]:] = 1. self.pt_score = self.reconstruction self.pt_grad = self.grad_cd1 self.l2 = l2 self.rho = rho self.lmbd = lmbd self.rho_hat = None return pt_params
def __init__(self, layer_shape, dropout_probability, n_epochs = 50, l2_max = 15.0, learning_rate = lambda x:1.0 * .998 ** x, doGradientCheck = False): assert(len(dropout_probability) == len(layer_shape)) self.dropout_probability = dropout_probability self.activation_hidden = activation_relu self.gradient_hidden = gradient_relu self.activation_output = activation_softmax self.gradient_output = gradient_output_softmax self.n_epochs = n_epochs self.f_score = score_softmax self.learning_rate = learning_rate self.mini_batch_size = 100 self.doGradientCheck = doGradientCheck self.l2_max = l2_max self.training_score = [] self.training_validation_error = [] self.weights = [] self.activation = [] self.gradient = [] for i in range(1,len(layer_shape)): self.weights.append([g.randn(layer_shape[i-1],layer_shape[i])*0.01, g.zeros(layer_shape[i])]) self.activation.append(self.activation_hidden) self.gradient.append(self.gradient_hidden) self.activation[-1] = self.activation_output self.gradient[-1] = self.gradient_output
def pt_init(self, H=bernoulli, V=bernoulli, init_var=1e-2, init_bias=0., rho=0.5, lmbd=0., l2=0., **kwargs): pt_params = gzeros(self.m_end + self.shape[1] + self.shape[0]) if init_var is None: init_heur = 4 * np.sqrt(6. / (self.shape[0] + self.shape[1])) pt_params[:self.m_end] = gpu.rand(self.m_end) pt_params[:self.m_end] *= 2 pt_params[:self.m_end] -= 1 pt_params[:self.m_end] *= init_heur else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:] = init_bias self.H = H self.V = V self.activ = match_table[H] self.pt_score = self.reconstruction self.pt_grad = self.grad_cd1 self.l2 = l2 self.rho = rho self.lmbd = lmbd self.rho_hat = None return pt_params
def pt_init(self, H=bernoulli, V=bernoulli, init_var=1e-2, init_bias=0., rho=0.5, lmbd=0., l2=0., **kwargs): pt_params = gzeros(self.m_end + self.shape[1] + self.shape[0]) if init_var is None: init_heur = 4*np.sqrt(6./(self.shape[0]+self.shape[1])) pt_params[:self.m_end] = gpu.rand(self.m_end) pt_params[:self.m_end] *= 2 pt_params[:self.m_end] -= 1 pt_params[:self.m_end] *= init_heur else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:] = init_bias self.H = H self.V = V self.activ = match_table[H] self.pt_score = self.reconstruction self.pt_grad = self.grad_cd1 self.l2 = l2 self.rho = rho self.lmbd = lmbd self.rho_hat = None return pt_params
def test_databias_loss_with_net(add_noise, loss_type, **kwargs): print 'Testing Loss <' + loss_type + '> with network, '\ + ('with noise' if add_noise else 'without noise') + ', ' \ + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()]) n_cases = 5 n_datasets = 3 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_databias_net(dropout_rate) net.set_loss(loss_type) print net x = gnp.randn(n_cases, net.in_dim) s = np.arange(n_cases) % n_datasets net.load_target(s, K=n_datasets, **kwargs) if add_noise: gnp.seed_rand(seed) net.clear_gradient() net.forward_prop(x, add_noise=add_noise, compute_loss=True) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def init_params(self, w_scale=0.01, b_scale=0.0): """Randomly initialize the weights in this layer.""" self.params['W'] = w_scale * gp.randn((self.dim_input, self.dim_output)) self.grads['W'] = gp.zeros((self.dim_input, self.dim_output)) self.params['b'] = gp.zeros((1, self.dim_output)) self.grads['b'] = gp.zeros((1, self.dim_output)) return
def test_autoencoder(add_noise=False): print 'Testing AutoEncoder ' + ('with noise' if add_noise else 'without noise') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 autoencoder = create_autoencoder(dropout_rate) print autoencoder x = gnp.randn(n_cases, autoencoder.in_dim) if add_noise: gnp.seed_rand(seed) autoencoder.clear_gradient() autoencoder.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) autoencoder.backward_prop() backprop_grad = autoencoder.get_grad_vec() f = fdiff_grad_generator(autoencoder, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, autoencoder.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def random_normal_like(x, loc, scale): """Return an array of the same shape as `x` filled with random numbers from the interval [0, 1).""" if not isinstance(x, np.ndarray): return gp.randn(*x.shape) * scale + loc else: return np.random.normal(loc, scale, x.shape)
def test_databias_loss(loss_type, **kwargs): print 'Testing Loss <' + loss_type + '> ' \ + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()]) n_cases = 5 n_datasets = 3 in_dim = 2 x = gnp.randn(n_cases, in_dim) s = np.arange(n_cases) % n_datasets loss = ls.get_loss_from_type_name(loss_type) loss.load_target(s, K=n_datasets, **kwargs) def f(w): return loss.compute_loss_and_grad(w.reshape(x.shape), compute_grad=True)[0] backprop_grad = loss.compute_loss_and_grad( x, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, x.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def __init__(self, layer_shape, dropout_probability, n_epochs=50, l2_max=15.0, learning_rate=lambda x: 1.0 * .998**x, doGradientCheck=False): assert (len(dropout_probability) == len(layer_shape)) self.dropout_probability = dropout_probability self.activation_hidden = activation_relu self.gradient_hidden = gradient_relu self.activation_output = activation_softmax self.gradient_output = gradient_output_softmax self.n_epochs = n_epochs self.f_score = score_softmax self.learning_rate = learning_rate self.mini_batch_size = 100 self.doGradientCheck = doGradientCheck self.l2_max = l2_max self.training_score = [] self.training_validation_error = [] self.weights = [] self.activation = [] self.gradient = [] for i in range(1, len(layer_shape)): self.weights.append([ g.randn(layer_shape[i - 1], layer_shape[i]) * 0.01, g.zeros(layer_shape[i]) ]) self.activation.append(self.activation_hidden) self.gradient.append(self.gradient_hidden) self.activation[-1] = self.activation_output self.gradient[-1] = self.gradient_output
def __init__(self, in_dim, out_dim, afun=lnf.rehu_trans): self.dim_input = in_dim self.dim_output = out_dim self.W = gp.randn((out_dim, in_dim)) self.act_trans = afun self.ff_evals = 0 self.bp_evals = 0 return
def test_rnn(): print 'Testing RNN' n_cases = 5 in_dim = 3 out_dim = 2 label_dim = 2 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, label_dim) net = nn.NeuralNet(out_dim, label_dim) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) net.load_target(t) rnn_net = rnn.RNN(in_dim, out_dim) print rnn_net print net rnn_net.clear_gradient() net.clear_gradient() h = rnn_net.forward_prop(x) net.forward_prop(h, add_noise=False, compute_loss=True, is_test=False) dh = net.backward_prop() rnn_net.backward_prop(dh) backprop_grad = rnn_net.get_grad_vec() def f(w): rnn_net.clear_gradient() rnn_net.set_param_from_vec(w) h = rnn_net.forward_prop(x) net.forward_prop(h, add_noise=False, compute_loss=True, is_test=False) return net.get_loss() fdiff_grad = finite_difference_gradient(f, rnn_net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_rnn_hybrid(add_noise=False, has_input=True): print 'Testing RNN hybrid, ' + ('with' if add_noise else 'without') + ' noise, ' \ + ('with' if has_input else 'without') + ' input' n_cases = 5 net = create_default_rnn_hybrid(add_noise=add_noise, has_input=has_input) print net x = gnp.randn(n_cases, net.in_dim) if has_input else None t = gnp.randn(n_cases, net.out_dim) net.load_target(t) seed = 8 gnp.seed_rand(seed) net.clear_gradient() """ if not has_input: import ipdb ipdb.set_trace() """ net.forward_prop(X=x, T=n_cases, add_noise=add_noise, compute_loss=True, is_test=False) net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed, T=n_cases) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def pt_init(self, score=None, init_var=1e-2, init_bias=0., SI=15, **kwargs): if init_var is None: self.SI = SI self.p[:self.m_end] = gpu.garray(init_SI(self.shape, sparsity=SI)).ravel() else: self.p[:self.m_end] = init_var * gpu.randn(self.m_end) self.p[self.m_end:] = init_bias self.score = score return self.p
def gaussian(data, wm, bias, sampling=False): """Gaussian with fixed variance of 1. """ suff = gpu.dot(data, wm) + bias if sampling: sample = suff + gpu.randn(suff.shape) else: sample = None return suff, sample
def test_stacked_net_gradient(add_noise=False): print 'Testing StackedNeuralNet' in_dim = 3 out_dim = [5, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 stacked_net = create_stacked_net(in_dim, out_dim, dropout_rate) print stacked_net x = gnp.randn(n_cases, in_dim) t1 = gnp.randn(n_cases, out_dim[0]) t3 = gnp.randn(n_cases, out_dim[2]) stacked_net.load_target(t1, None, t3) if add_noise: gnp.seed_rand(seed) stacked_net.clear_gradient() stacked_net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) stacked_net.backward_prop() backprop_grad = stacked_net.get_grad_vec() f = fdiff_grad_generator(stacked_net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, stacked_net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed
def test_neuralnet(add_noise=False, loss_after_nonlin=False, use_batch_normalization=False): print 'Testing NeuralNet, ' + ('with noise' if add_noise else 'without noise') \ + ', ' + ('with BN' if use_batch_normalization else 'without BN') n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 net = create_neuralnet(dropout_rate, loss_after_nonlin=loss_after_nonlin, use_batch_normalization=use_batch_normalization) print net x = gnp.randn(n_cases, net.in_dim) t = gnp.randn(n_cases, net.out_dim) if net.loss.target_should_be_one_hot(): new_t = np.zeros(t.shape) new_t[np.arange(t.shape[0]), t.argmax(axis=1)] = 1 t = gnp.garray(new_t) elif net.loss.target_should_be_normalized(): t = t - t.min(axis=1)[:,gnp.newaxis] + 1 t /= t.sum(axis=1)[:,gnp.newaxis] net.load_target(t) if add_noise: gnp.seed_rand(seed) net.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) net.clear_gradient() net.backward_prop() backprop_grad = net.get_grad_vec() f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, net.get_param_vec()) eps = _BN_GRAD_CHECK_EPS if use_batch_normalization else _GRAD_CHECK_EPS test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient', eps=eps, use_rel_err=use_batch_normalization) print '' gnp.seed_rand(int(time.time())) return test_passed
def initParams(self): # crude way of random initialization (random seed) for parameters import time self.seed = int(time.time()) % 100000; # for tt in range(self.seed): gp.rand() sizes = [self.inputDim]+self.layerSizes+[self.outputDim] scales = [gp.sqrt(6)/gp.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])] self.stack = [[gp.rand(m,n)*2*s-s,gp.zeros((m,1))] \ for n,m,s in zip(sizes[:-1],sizes[1:],scales)] self.hActs = [gp.empty((s,self.mbSize)) for s in sizes] if self.train: self.deltas = [gp.empty((s,self.mbSize)) for s in sizes[1:]] self.grad = [[gp.empty(w.shape),gp.empty(b.shape)] for w,b in self.stack] for tt in range(self.seed): gp.rand() self.stack = [[ws[0]+.01 * gp.randn(ws[0].shape),ws[1]+.01 * gp.randn(ws[1].shape)] for ws in self.stack]
def initialize_self_weights(self, num_in, scale_big, scale_small=0, W_to_exclude=[]): from opt.utils.extra import sparsify_strict for i, param in enumerate(self.W): if i not in W_to_exclude: param[:] = g.randn(*param.shape) sparsify_strict(param, num_in, scale_big, scale_small) return self
def __init__(self, v, h, o, hid_nonlin=None, out_nonlin=None, struct_damp_nonlin=None, init=True): self.v = v self.h = h self.o = o if hid_nonlin is None: hid_nonlin = nonlin.Tanh if out_nonlin is None: out_nonlin = nonlin.Softmax if struct_damp_nonlin is None: raise TypeError('must specify struct_damp_nonlin.') self.hid_nonlin = hid_nonlin self.out_nonlin = out_nonlin self.struct_damp_nonlin = struct_damp_nonlin if init: self.h_init = g.randn(1, h) self.W_hh = g.randn(h, h) self.W_vh = g.randn(v, h) self.W_ho = g.randn(h, o) if hid_nonlin is None: hid_nonlin = nonlin.Tanh self.hid_nonlin = hid_nonlin if out_nonlin is None: out_nonlin = nonlin.Lin self.out_nonlin = out_nonlin
def gauss(data, wm, bias, prec, sampling=False): """A gauss with given diagonal precision _prec_ (better: _prec_ is interpreted as square root of a diagonal precision. """ suff = gpu.dot(data, wm) + bias if sampling: sample = suff + gpu.randn(suff.shape)/prec else: sample = None return suff, sample
def gauss(data, wm, bias, prec, sampling=False): """A gauss with given diagonal precision _prec_ (better: _prec_ is interpreted as square root of a diagonal precision. """ suff = gpu.dot(data, wm) + bias if sampling: sample = suff + gpu.randn(suff.shape) / prec else: sample = None return suff, sample
def pt_init(self, score=None, init_var=1e-2, init_bias=0., **kwargs): pt_params = gzeros(self.size + self.m_end + self.shape[0]) if init_var is None: init_heur = 4*np.sqrt(6./(self.shape[0]+self.shape[1])) pt_params[:self.m_end] = gpu.rand(self.m_end) pt_params[:self.m_end] *= 2 pt_params[:self.m_end] -= 1 pt_params[:self.m_end] *= init_heur pt_params[self.size:-self.shape[0]] = gpu.rand(self.m_end) pt_params[self.size:-self.shape[0]] *= 2 pt_params[self.size:-self.shape[0]] -= 1 pt_params[self.size:-self.shape[0]] *= init_heur else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.size:-self.shape[0]] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:self.size] = init_bias pt_params[-self.shape[0]:] = init_bias self.score = score return pt_params
def pt_init(self, score=None, init_var=1e-2, init_bias=0., SI=15, **kwargs): if init_var is None: self.init_var = None self.SI = SI self.p[:self.m_end] = gpu.garray(init_SI(self.shape, sparsity=SI)).ravel() else: self.SI = SI self.init_var = init_var self.p[:self.m_end] = init_var * gpu.randn(self.m_end) self.p[self.m_end:] = init_bias self.score = score return self.p
def pt_init(self, score=None, init_var=1e-2, init_bias=0., l2=0., SI=15, **kwargs): pt_params = gzeros(self.m_end + self.shape[1] + self.shape[0]) if init_var is None: pt_params[:self.m_end] = gpu.garray(init_SI(self.shape, sparsity=SI)).ravel() else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:] = init_bias self.score = score self.l2 = l2 return pt_params
def _init_params(self): if self.has_input: self.W_ih = gnp.randn(self.in_dim, self.out_dim) / math.sqrt(self.in_dim) self.dW_ih = self.W_ih * 0 self.W_hh = gnp.eye(self.out_dim) self.b = gnp.zeros(self.out_dim) self.dW_hh = self.W_hh * 0 self.db = self.b * 0 self._update_param_size()
def pt_init(self, score=None, init_var=1e-2, init_bias=0., l2=0., SI=15, **kwargs): pt_params = gzeros(self.m_end + self.shape[0]) if init_var is None: pt_params[:self.m_end] = gpu.garray(init_SI(self.shape, sparsity=SI)).ravel() else: pt_params[:self.m_end] = init_var * gpu.randn(self.m_end) pt_params[self.m_end:] = init_bias self.score = score self.l2 = l2 return pt_params
def nrelu(data, wm, bias, sampling=False): """A noisy rectified linear unit. """ suff = gpu.dot(data, wm) + bias if sampling: sample = suff + (gpu.sqrt(suff.logistic()) * gpu.randn(suff.shape)) #sample = suff + gpu.randn(suff.shape) sample *= (sample > 0) else: sample = None suff *= (suff > 0) return suff, sample
def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], pred_per_example=1, target_per_example=[1], loss_name=None): assert loss_name is not None print 'Testing differentiable kernel per example MMD loss <%s>' % loss_name if len(target_per_example) == 1: target_per_example = target_per_example * 3 n_dims = 3 n_target = sum(target_per_example) n_pred = len(target_per_example) * pred_per_example pred = gnp.randn(n_pred, n_dims) target = [] for i_target in target_per_example: target.append(gnp.randn(i_target, n_dims)) mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight, pred_per_example=pred_per_example) mmd.load_target(target) print mmd def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad( pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def test_generative_multi_scale_mmd_loss(sigma=[1, 10], scale_weight=None): print 'Testing generative multi-scale MMD loss, sigma=%s' % str(sigma) n_dims = 3 n_target = 5 n_pred = 4 target = gnp.randn(n_target, n_dims) pred = gnp.randn(n_pred, n_dims) mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE, sigma=sigma, scale_weight=scale_weight) mmd.load_target(target) def f(w): return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0] backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel() fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def _init_params(self): if self.has_input: self.W_ih = gnp.randn(self.in_dim, self.out_dim) / math.sqrt( self.in_dim) self.dW_ih = self.W_ih * 0 self.W_hh = gnp.eye(self.out_dim) self.b = gnp.zeros(self.out_dim) self.dW_hh = self.W_hh * 0 self.db = self.b * 0 self._update_param_size()
def __init__(self, in_dim=1, out_dim=1, init_scale=1e-1, dropout=0, init_bias=0): self.W = gnp.randn(in_dim, out_dim) * init_scale self.b = gnp.ones(out_dim) * init_bias self.W_grad = self.W * 0 self.b_grad = self.b * 0 self.param_size = self.W.size + self.b.size self.dropout = dropout # get an ID for this param variable. self._param_id = LayerParams._param_count LayerParams._param_count += 1
def test_y_net_gradient(add_noise=False): print 'Testing YNeuralNet ' + ('with noise' if add_noise else 'without noise') in_dim = 3 out_dim = [2, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 if add_noise else 0 ynet = create_y_net(in_dim, out_dim, dropout_rate) print ynet x = gnp.randn(n_cases, in_dim) t1 = gnp.randn(n_cases, out_dim[0]) t2 = gnp.randn(n_cases, out_dim[1]) t3 = gnp.randn(n_cases, out_dim[2]) ynet.load_target([None, t1], t2, t3) if add_noise: gnp.seed_rand(seed) ynet.clear_gradient() ynet.forward_prop(x, add_noise=add_noise, compute_loss=True, is_test=False) ynet.backward_prop() backprop_grad = ynet.get_grad_vec() f = fdiff_grad_generator(ynet, x, None, add_noise=add_noise, seed=seed) fdiff_grad = finite_difference_gradient(f, ynet.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' gnp.seed_rand(int(time.time())) return test_passed