def sample_mean(self, X): mu = X[0] sig = X[1] coeff = X[2] mu = mu.reshape( (mu.shape[0], mu.shape[1] / coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape( (sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1])) idx = predict(self.theano_rng.multinomial(pvals=coeff, dtype=coeff.dtype), axis=1) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def emit(self, readouts): """Sample from the distribution. """ mu, sigma, corr, coeff, penup = self.components(readouts) idx = predict(self.theano_rng.multinomial(pvals=coeff, dtype=coeff.dtype), axis=1) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] corr = corr[tensor.arange(corr.shape[0]), idx] mu_x = mu[:, 0] mu_y = mu[:, 1] sigma_x = sigma[:, 0] sigma_y = sigma[:, 1] z = self.theano_rng.normal(size=mu.shape, avg=0.0, std=1.0, dtype=mu.dtype) un = self.theano_rng.uniform(size=penup.shape) penup = tensor.cast(un < penup, floatX) s_x = (mu_x + sigma_x * z[:, 0]).dimshuffle(0, "x") s_y = mu_y + sigma_y * ((z[:, 0] * corr) + (z[:, 1] * tensor.sqrt(1.0 - corr ** 2))) s_y = s_y.dimshuffle(0, "x") s = tensor.concatenate([penup, s_x, s_y], axis=1) return s
def sample(self, X): mu = X[0] sig = X[1] coeff = X[2] n_noise = T.cast(T.floor(coeff.shape[-1] * self.p_noise), 'int32') mu = T.concatenate( [mu, T.zeros((mu.shape[0], n_noise*sig.shape[1]/coeff.shape[-1]))], axis=1 ) mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict( self.theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1 ) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] sample = self.theano_rng.normal(size=mu.shape, avg=mu, std=sig, dtype=mu.dtype) return sample
def emit(self, readouts): mu, sigma, coeff = self.gmmmlp.apply(readouts) frame_size = mu.shape[-1]/coeff.shape[-1] k = coeff.shape[-1] shape_result = coeff.shape shape_result = tensor.set_subtensor(shape_result[-1],frame_size) ndim_result = coeff.ndim mu = mu.reshape((-1, frame_size, k)) sigma = sigma.reshape((-1, frame_size,k)) coeff = coeff.reshape((-1, k)) sample_coeff = self.theano_rng.multinomial(pvals = coeff, dtype=coeff.dtype) idx = predict(sample_coeff, axis = -1) #idx = predict(coeff, axis = -1) use this line for using most likely coeff. mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] epsilon = self.theano_rng.normal( size=mu.shape,avg=0., std=1., dtype=mu.dtype) result = mu + sigma*epsilon return result.reshape(shape_result, ndim = ndim_result)
def emit(self, readouts): """Sample from the distribution. """ mu, sigma, corr, coeff, penup = self.components(readouts) idx = predict( self.theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] corr = corr[tensor.arange(corr.shape[0]), idx] mu_x = mu[:,0] mu_y = mu[:,1] sigma_x = sigma[:,0] sigma_y = sigma[:,1] z = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) un = self.theano_rng.uniform(size=penup.shape) penup = tensor.cast(un < penup, floatX) s_x = (mu_x + sigma_x * z[:,0]).dimshuffle(0,'x') s_y = mu_y + sigma_y * ( (z[:,0] * corr) + (z[:,1] * tensor.sqrt(1.-corr**2))) s_y = s_y.dimshuffle(0,'x') s = tensor.concatenate([penup,s_x,s_y], axis = 1) return s
def GMM_sample(mu, sig, coeff, theano_rng=default_theano_rng): mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict( theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1 ) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z
def argmax_mean(self, X): mu = X[0] sig = X[1] coeff = X[2] mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict(coeff) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def sample_mean(self, X): mu = X[0] sig = X[1] coeff = X[2] mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict( self.theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1 ) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def emit(self, readouts): mu, sigma, coeff = self.gmmmlp.apply(readouts) frame_size = mu.shape[-1] / coeff.shape[-1] k = coeff.shape[-1] shape_result = coeff.shape shape_result = tensor.set_subtensor(shape_result[-1], frame_size) ndim_result = coeff.ndim mu = mu.reshape((-1, frame_size, k)) sigma = sigma.reshape((-1, frame_size, k)) coeff = coeff.reshape((-1, k)) sample_coeff = self.theano_rng.multinomial(pvals=coeff, dtype=coeff.dtype) idx = predict(sample_coeff, axis=-1) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) result = mu + sigma * epsilon return result.reshape(shape_result, ndim=ndim_result)
def GMM_sampleY(mu, sig, coeff, theano_rng=default_theano_rng): mu = mu.reshape((mu.shape[0], mu.shape[1]//coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]//coeff.shape[-1], coeff.shape[-1])) idx = predict( theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1 ) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) #mu = mu.sum(axis=1) #sig = sig.sum(axis=1) z = mu + sig * epsilon return z
def argmax_mean(self, X): mu = X[0] coeff = X[1] mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict(coeff) mu = mu[T.arange(mu.shape[0]), :, idx] return mu
def emit(self, readouts): """ keep_parameters is True if mu,sigma,coeffs must be stacked and returned if false, only the result is given, the others will be empty list. """ # initial state state = self.frnn_initial_state.apply(self.mlp.apply(readouts)) results = [] for i in range(self.number_of_steps): last_iteration = i == self.number_of_steps - 1 # First generating distribution parameters and sampling. mu = self.mu.apply(state) sigma = self.sigma.apply(state) + self.const coeff = self.coeff2.apply(self.coeff.apply(state), extra_ndim=state.ndim - 2) + self.const shape_result = coeff.shape shape_result = tensor.set_subtensor(shape_result[-1], self.frnn_step_size) ndim_result = coeff.ndim mu = mu.reshape((-1, self.frnn_step_size, self.k)) sigma = sigma.reshape((-1, self.frnn_step_size, self.k)) coeff = coeff.reshape((-1, self.k)) sample_coeff = self.theano_rng.multinomial(pvals=coeff, dtype=coeff.dtype) idx = predict(sample_coeff, axis=-1) # idx = predict(coeff, axis = -1) use this line for using most likely coeff. # shapes (ls*bs)*(fs) mu = mu[tensor.arange(mu.shape[0]), :, idx] sigma = sigma[tensor.arange(sigma.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0.0, std=1.0, dtype=mu.dtype) result = mu + sigma * epsilon # *0.6 #reduce variance. result = result.reshape(shape_result, ndim=ndim_result) results.append(result) # if the total size does not correspond to the frame_size, # this removes the need for padding if not last_iteration: state = self.frnn_activation.apply( self.frnn_linear_transition_state.apply(state) + self.frnn_linear_transition_input.apply(result) ) results = tensor.stack(results, axis=-1) results = tensor.flatten(results, outdim=results.ndim - 1) # truncate if not good size if self.last_steps != 0: results = results[tuple([slice(0, None)] * (results.ndim - 1) + [slice(0, self.frame_size)])] return results
def argmax_mean(self, X): mu = X[0] coeff = X[1] mu = mu.reshape( (mu.shape[0], mu.shape[1] / coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape( (sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1])) idx = predict(coeff) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = self.theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def GMM_argmax_mean(mu, sig, coeff, theano_rng=default_theano_rng): mu = mu.reshape( (mu.shape[0], mu.shape[1] / coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape( (sig.shape[0], sig.shape[1] / coeff.shape[-1], coeff.shape[-1])) idx = predict(coeff) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def GMM_argmax_mean(mu, sig, coeff, theano_rng=default_theano_rng): mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict(coeff) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] epsilon = theano_rng.normal(size=mu.shape, avg=0., std=1., dtype=mu.dtype) z = mu + sig * epsilon return z, mu
def sample(self, X): ind, mu = X[0][:, 0], X[0][:, 1:] sig = X[1] coeff = X[2] mu = mu.reshape((mu.shape[0], mu.shape[1]/coeff.shape[-1], coeff.shape[-1])) sig = sig.reshape((sig.shape[0], sig.shape[1]/coeff.shape[-1], coeff.shape[-1])) idx = predict( self.theano_rng.multinomial( pvals=coeff, dtype=coeff.dtype ), axis=1 ) mu = mu[T.arange(mu.shape[0]), :, idx] sig = sig[T.arange(sig.shape[0]), :, idx] sample = self.theano_rng.normal(size=mu.shape, avg=mu, std=sig, dtype=mu.dtype) sample = T.concatenate([ind.dimshuffle(0, 'x'), sample], axis=1) return sample
def emit(self,readouts): """ keep_parameters is True if mu,sigma,coeffs must be stacked and returned if false, only the result is given, the others will be empty list. """ # initial state state = self.frnn_initial_state.apply(\ self.mlp.apply(readouts)) results = [] for i in range(self.number_of_steps): last_iteration = (i == self.number_of_steps - 1) # First generating distribution parameters and sampling. mu = self.mu.apply(state) sigma = self.sigma.apply(state) + self.const coeff = self.coeff2.apply(self.coeff.apply(state),\ extra_ndim=state.ndim - 2) + self.const shape_result = coeff.shape shape_result = tensor.set_subtensor(shape_result[-1],self.frnn_step_size) ndim_result = coeff.ndim mu = mu.reshape((-1, self.frnn_step_size,self.k)) sigma = sigma.reshape((-1, self.frnn_step_size,self.k)) coeff = coeff.reshape((-1, self.k)) sample_coeff = self.theano_rng.multinomial(pvals = coeff, dtype=coeff.dtype) idx = predict(sample_coeff, axis = -1) #idx = predict(coeff, axis = -1) use this line for using most likely coeff. #shapes (ls*bs)*(fs) mu = mu[tensor.arange(mu.shape[0]), :,idx] sigma = sigma[tensor.arange(sigma.shape[0]), :,idx] epsilon = self.theano_rng.normal( size=mu.shape, avg=0., std=1., dtype=mu.dtype) result = mu + sigma*epsilon#*0.6 #reduce variance. result = result.reshape(shape_result, ndim = ndim_result) results.append(result) # if the total size does not correspond to the frame_size, #this removes the need for padding if not last_iteration: state = self.frnn_activation.apply( self.frnn_linear_transition_state.apply(state) + self.frnn_linear_transition_input.apply(result)) results = tensor.stack(results,axis=-1) results = tensor.flatten(results,outdim=results.ndim-1) # truncate if not good size if self.last_steps != 0: results = results[tuple([slice(0,None)] * \ (results.ndim-1) +[slice(0,self.frame_size)])] return results
for node in nodes: node.initialize() # Collect parameters params = flatten([node.get_params().values() for node in nodes]) # Build the Theano computational graph h1_out = h1.fprop([x]) d1_out = d1.fprop([h1_out]) h2_out = h2.fprop([d1_out]) d2_out = d2.fprop([h2_out]) y_hat = output.fprop([d2_out]) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' d1.set_mode(1) d2.set_mode(1) mn_h1_out = h1.fprop([mn_x]) mn_h2_out = h2.fprop([mn_h1_out]) mn_y_hat = output.fprop([mn_h2_out]) mn_cost = NllMulInd(mn_y, mn_y_hat).mean() mn_err = error(predict(mn_y_hat), mn_y) mn_cost.name = 'cross_entropy' mn_err.name = 'error_rate' monitor_fn = theano.function([mn_x, mn_y], [mn_cost, mn_err])
unit='softmax', init_W=init_W, init_b=init_b) cost = MulCrossEntropyLayer(name='cost', parent=['y', 'h4']) # You will fill in a list of nodes and fed them to the model constructor nodes = [c1, c2, h1, h2, h3, h4, cost] # Your model will build the Theano computational graph cnn = Net(inputs=inputs, inputs_dim=inputs_dim, nodes=nodes) cnn.build_graph() # You can access any output of a node by doing model.nodes[$node_name].out cost = cnn.nodes['cost'].out err = error(predict(cnn.nodes['h4'].out), predict(y)) cost.name = 'cost' err.name = 'error_rate' model.graphs = [cnn] # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = Adam( #lr=0.00005 lr=0.0005 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[cost, err],
unit='softmax', init_W=init_W, init_b=init_b) cost = MulCrossEntropyLayer(name='cost', parent=['y', 'h4']) # You will fill in a list of nodes and fed them to the model constructor nodes = [c1, c2, h1, h2, h3, h4, cost] # Your model will build the Theano computational graph cnn = Net(inputs=inputs, inputs_dim=inputs_dim, nodes=nodes) cnn.build_graph() # You can access any output of a node by doing model.nodes[$node_name].out cost = cnn.nodes['cost'].out err = error(predict(cnn.nodes['h4'].out), predict(y)) cost.name = 'cost' err.name = 'error_rate' model.graphs = [cnn] # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = Adam( #lr=0.00005 lr=0.0005) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[cost, err], data=[Iterator(test_data, batch_size)]),
unit='softmax', init_W=init_W, init_b=init_b) cost = MulCrossEntropyLayer(name='cost', parent=['onehot', 'h2']) # You will fill in a list of nodes and fed them to the model constructor nodes = [onehot, h1, h2, cost] # Your model will build the Theano computational graph mlp = Net(inputs=inputs, inputs_dim=inputs_dim, nodes=nodes) mlp.build_graph() # You can access any output of a node by doing model.nodes[$node_name].out cost = mlp.nodes['cost'].out err = error(predict(mlp.nodes['h2'].out), predict(mlp.nodes['onehot'].out)) cost.name = 'cost' err.name = 'error_rate' model.graphs = [mlp] # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = RMSProp( lr=0.001 ) extension = [ GradientClipping(), EpochCount(40), Monitoring(freq=100, ddout=[cost, err], data=[Iterator(trdata, batch_size),
# You will fill in a list of nodes nodes = [h1, output] # Initalize the nodes for node in nodes: node.initialize() params = flatten([node.get_params().values() for node in nodes]) # Build the Theano computational graph h1_out = h1.fprop([x]) y_hat = output.fprop([h1_out]) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' model.inputs = [x, y] model._params = params model.nodes = nodes # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = RMSProp(lr=0.001) extension = [ GradientClipping(), EpochCount(40), Monitoring(freq=100, ddout=[cost, err],
# Initalize the nodes params = OrderedDict() for node in nodes: params.update(node.initialize()) params = init_tparams(params) nparams = add_noise_params(params, std_dev=std_dev) # Build the Theano computational graph d_x = inp_scale * dropout(x, p=inp_p) h1_out = h1.fprop([d_x], nparams) d1_out = int_scale * dropout(h1_out, p=int_p) y_hat = output.fprop([d1_out], nparams) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' # Seperate computational graph to compute monitoring values without # considering the noising processes m_h1_out = h1.fprop([x], params) m_y_hat = output.fprop([m_h1_out], params) m_cost = NllMulInd(y, m_y_hat).mean() m_err = error(predict(m_y_hat), y) m_cost.name = 'cross_entropy' m_err.name = 'error_rate' monitor_fn = theano.function([x, y], [m_cost, m_err])
# You will fill in a list of nodes nodes = [h1, output] # Initalize the nodes params = OrderedDict() for node in nodes: params.update(node.initialize()) params = init_tparams(params) # Build the Theano computational graph h1_out = h1.fprop([x], params) y_hat = output.fprop([h1_out], params) # Compute the cost cost = NllMulInd(y, y_hat).mean() err = error(predict(y_hat), y) cost.name = 'cross_entropy' err.name = 'error_rate' model.inputs = [x, y] model.params = params model.nodes = nodes # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = RMSProp( lr=0.01 ) extension = [ GradientClipping(batch_size=batch_size, check_nan=1), EpochCount(500),