def construct_model(vocab_size, embedding_dim, ngram_order, hidden_dims, activations): # Construct the model x = tensor.lmatrix('features') y = tensor.lvector('targets') lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup') hidden = MLP(activations=activations + [None], dims=[ngram_order * embedding_dim] + hidden_dims + [vocab_size]) embeddings = lookup.apply(x) embeddings = embeddings.flatten(ndim=2) # Concatenate embeddings activations = hidden.apply(embeddings) cost = Softmax().categorical_cross_entropy(y, activations) # Initialize parameters lookup.weights_init = IsotropicGaussian(0.001) hidden.weights_init = IsotropicGaussian(0.01) hidden.biases_init = Constant(0.001) lookup.initialize() hidden.initialize() return cost
name="readout") attention = SimpleSequenceAttention(state_names=source_names, state_dims=[hidden_size_recurrent], attended_dim=context_size) generator = SequenceGenerator(readout=readout, transition=transition, attention=attention, name="generator") generator.weights_init = IsotropicGaussian(0.01) generator.biases_init = Constant(0.) generator.initialize() mlp_context.weights_init = IsotropicGaussian(0.01) mlp_context.biases_init = Constant(0.) mlp_context.initialize() #ipdb.set_trace() cost_matrix = generator.cost_matrix(x, x_mask, attended=mlp_context.apply(context)) cost = cost_matrix.sum() / x_mask.sum() cost.name = "sequence_log_likelihood" cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm
generated_samples = g.apply(noise) discriminated_features = d.apply(features) discriminated_samples = d.apply(generated_samples) generator_cg = ComputationGraph(generated_samples) discriminator_cg = ComputationGraph(discriminated_features) dsamples_cg = ComputationGraph(discriminated_samples) generator_parameters = generator_cg.parameters m = 100 b_size = discriminated_features.shape[0] / 2 cost_generator = tensor.sum(tensor.log(1 + tensor.exp(-discriminated_samples))) / discriminated_samples.shape[0].astype('float32') cost_discriminator = (tensor.sum(discriminated_features[:b_size]) + tensor.sum(tensor.log(1 + tensor.exp(-discriminated_features)))) / b_size.astype('float32') g.weights_init = IsotropicGaussian(0.05) d.weights_init = IsotropicGaussian(0.005) g.biases_init = d.biases_init = Constant(0) g.initialize() d.initialize() for param in generator_cg.parameters: param.name += '_g' for param in discriminator_cg.parameters: param.name += '_d' both = list(set(dsamples_cg.parameters) & set(generator_cg.parameters)) indices = [] for (i, par) in enumerate(dsamples_cg.parameters):
attention = SimpleSequenceAttention( state_names = source_names, state_dims = [hidden_size_recurrent], attended_dim = context_size) generator = SequenceGenerator(readout=readout, transition=transition, attention = attention, name = "generator") generator.weights_init = IsotropicGaussian(0.01) generator.biases_init = Constant(0.) generator.initialize() mlp_context.weights_init = IsotropicGaussian(0.01) mlp_context.biases_init = Constant(0.) mlp_context.initialize() #ipdb.set_trace() cost_matrix = generator.cost_matrix(x, x_mask, attended = mlp_context.apply(context)) cost = cost_matrix.sum()/x_mask.sum() cost.name = "sequence_log_likelihood" cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm #################