############## # Test model ############## x_g = mlp_x.apply(x) h = transition.apply(x_g) mu, sigma, coeff = mlp_gmm.apply(h[-2]) #cost = GMM(y, mu, sigma, coeff) cost = gmm_emitter.cost(h[-2], y) cost = cost.mean() cost.name = 'sequence_log_likelihood' emit = gmm_emitter.emit(h[-2]) emit.name = 'emitter' cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm ################# n_batches = 139*16 algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=CompositeRule([StepClipping(10.0), Adam(lr)]))
brick.biases_init = Constant(0.) brick.initialize() ############## # Test model ############## x_g = mlp_x.apply(x) h = transition.apply(x_g) mu, sigma, coeff = mlp_gmm.apply(h[-2]) cost = gmm_emitter.cost(h[-2], y) cost = cost.mean() cost.name = 'nll' emit = gmm_emitter.emit(h[-2]) emit.name = 'emitter' cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm ################# n_batches = 139 * 16 algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0),
class SimplePyramidLayer(Initializable): """Basic unit for the pyramid model. """ def __init__(self, batch_size, frame_size, k, depth, size, **kwargs): super(SimplePyramidLayer, self).__init__(**kwargs) target_size = frame_size * k depth_x = depth hidden_size_mlp_x = 32*size depth_transition = depth-1 depth_theta = depth hidden_size_mlp_theta = 32*size hidden_size_recurrent = 32*size*3 activations_x = [Rectifier()]*depth_x dims_x = [frame_size] + [hidden_size_mlp_x]*(depth_x-1) + \ [4*hidden_size_recurrent] activations_theta = [Rectifier()]*depth_theta dims_theta = [hidden_size_recurrent] + \ [hidden_size_mlp_theta]*depth_theta self.mlp_x = MLP(activations = activations_x, dims = dims_x, name = "mlp_x") transition = [GatedRecurrent(dim=hidden_size_recurrent, use_bias = True, name = "gru_{}".format(i) ) for i in range(depth_transition)] self.transition = RecurrentStack( transition, name="transition", skip_connections = True) mlp_theta = MLP( activations = activations_theta, dims = dims_theta, name = "mlp_theta") mlp_gmm = GMMMLP(mlp = mlp_theta, dim = target_size, k = k, const = 0.00001, name = "gmm_wrap") self.gmm_emitter = GMMEmitter(gmmmlp = mlp_gmm, output_size = frame_size, k = k) normal_inputs = [name for name in self.transition.apply.sequences if 'mask' not in name] self.fork = Fork(normal_inputs, input_dim = 4*hidden_size_recurrent, output_dims = self.transition.get_dims(normal_inputs)) self.children = [self.mlp_x, self.transition, self.gmm_emitter, self.fork] def monitoring_vars(self, cg): mu, sigma, coeff = VariableFilter( applications = [self.gmm_emitter.gmmmlp.apply], name_regex = "output")(cg.variables) min_sigma = sigma.min().copy(name="sigma_min") mean_sigma = sigma.mean().copy(name="sigma_mean") max_sigma = sigma.max().copy(name="sigma_max") min_mu = mu.min().copy(name="mu_min") mean_mu = mu.mean().copy(name="mu_mean") max_mu = mu.max().copy(name="mu_max") monitoring_vars = [mean_sigma, min_sigma, min_mu, max_mu, mean_mu, max_sigma] return monitoring_vars @application def cost(self, x, context, **kwargs): x_g = self.mlp_x.apply(context) inputs = self.fork.apply(x_g, as_dict = True) h = self.transition.apply(**dict_union(inputs, kwargs)) self.final_states = [] for var in h: self.final_states.append(var[-1].copy(name = var.name + "_final_value")) cost = self.gmm_emitter.cost(h[-1], x) return cost.mean() @application def generate(context): x_g = self.mlp_x.apply(context) inputs = self.fork.apply(x_g, as_dict = True) h = self.transition.apply(**dict_union(inputs, kwargs)) return self.gmm_emitter.emit(h[-1])