def __init__(self, stack_dim=500, **kwargs): """Sole constructor. Args: stack_dim (int): Size of vectors on the stack. """ super(PushDownSequenceContentAttention, self).__init__(**kwargs) self.stack_dim = stack_dim self.max_stack_depth = 25 self.stack_op_names = self.state_names + ['weighted_averages'] self.stack_pop_transformer = MLP(activations=[Logistic()], dims=None) self.stack_pop_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_pop_transformer, name="stack_pop") self.stack_push_transformer = MLP(activations=[Logistic()], dims=None) self.stack_push_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_push_transformer, name="stack_push") self.stack_input_transformer = Linear() self.stack_input_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_input_transformer, name="stack_input") self.children.append(self.stack_pop_transformers) self.children.append(self.stack_push_transformers) self.children.append(self.stack_input_transformers)
def create_vae(x=None, batch=batch_size): x = T.matrix('features') if x is None else x x = x / 255. encoder = MLP( activations=[Rectifier(), Logistic()], dims=[img_dim**2, hidden_dim, 2*latent_dim], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='encoder' ) encoder.initialize() z_param = encoder.apply(x) z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim] z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size) decoder = MLP( activations=[Rectifier(), Logistic()], dims=[latent_dim, hidden_dim, img_dim**2], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='decoder' ) decoder.initialize() x_reconstruct = decoder.apply(z) cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std) cost.name = 'vae_cost' return cost
def __init__(self, dim, activation=None, **kwargs): super(LSTM, self).__init__(**kwargs) self.dim = dim if not activation: activation = Tanh() self.in_activation = masonry.NormalizedActivation( shape=(self.dim, ), broadcastable=(False, ), activation=Logistic(), batch_normalize=True, name="in_activation") self.forget_activation = masonry.NormalizedActivation( shape=(self.dim, ), broadcastable=(False, ), activation=Logistic(), batch_normalize=True, name="forget_activation") self.out_activation = masonry.NormalizedActivation( shape=(self.dim, ), broadcastable=(False, ), activation=Logistic(), batch_normalize=True, name="out_activation") self.recurrent_activation = activation self.children = [ self.in_activation, self.forget_activation, self.out_activation, self.recurrent_activation ]
def decoder_network(latent_sample, latent_dim=J): # bernoulli case hidden2 = get_typical_layer(latent_sample, latent_dim, 500, Logistic()) hidden2_to_output = Linear(name="last", input_dim=500, output_dim=784) hidden2_to_output.weights_init = IsotropicGaussian(0.01) hidden2_to_output.biases_init = Constant(0) hidden2_to_output.initialize() return Logistic().apply(hidden2_to_output.apply(hidden2))
def create_model(self): x = self.x input_dim = self.input_dim mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() self.mlp = mlp probs = mlp.apply(x) return probs
def __init__(self, emb_dim, dim, dropout=0.0, def_word_gating="none", dropout_type="per_unit", compose_type="sum", word_dropout_weighting="no_weighting", shortcut_unk_and_excluded=False, num_input_words=-1, exclude_top_k=-1, vocab=None, **kwargs): self._dropout = dropout self._num_input_words = num_input_words self._exclude_top_K = exclude_top_k self._dropout_type = dropout_type self._compose_type = compose_type self._vocab = vocab self._shortcut_unk_and_excluded = shortcut_unk_and_excluded self._word_dropout_weighting = word_dropout_weighting self._def_word_gating = def_word_gating if def_word_gating not in {"none", "self_attention"}: raise NotImplementedError() if word_dropout_weighting not in {"no_weighting"}: raise NotImplementedError("Not implemented " + word_dropout_weighting) if dropout_type not in {"per_unit", "per_example", "per_word"}: raise NotImplementedError() children = [] if self._def_word_gating=="self_attention": self._gate_mlp = Linear(dim, dim) self._gate_act = Logistic() children.extend([self._gate_mlp, self._gate_act]) if compose_type == 'fully_connected_linear': self._def_state_compose = MLP(activations=[None], dims=[emb_dim + dim, emb_dim]) children.append(self._def_state_compose) if compose_type == "gated_sum" or compose_type == "gated_transform_and_sum": if dropout_type == "per_word" or dropout_type == "per_example": raise RuntimeError("I dont think this combination makes much sense") self._compose_gate_mlp = Linear(dim + emb_dim, emb_dim, name='gate_linear') self._compose_gate_act = Logistic() children.extend([self._compose_gate_mlp, self._compose_gate_act]) if compose_type == 'sum': if not emb_dim == dim: raise ValueError("Embedding has different dim! Cannot use compose_type='sum'") if compose_type == 'transform_and_sum' or compose_type == "gated_transform_and_sum": self._def_state_transform = Linear(dim, emb_dim, name='state_transform') children.append(self._def_state_transform) super(MeanPoolCombiner, self).__init__(children=children, **kwargs)
def build_network(self, num_labels, features, max_len=None, hidden_units=None, l2=None, use_cnn=None, cnn_filter_size=None, cnn_pool_size=None, cnn_num_filters=None, cnn_filter_sizes=None, embedding_size=None, DEBUG=False): """ Build the neural network used for training. :param num_labels: Number of labels to classify :param features: the input features we use :param max_len: Configured window-size :param hidden_units: Number of units in the MLP's hiddden layer :returns: The cost function, the misclassification rate function, the computation graph of the cost function and the prediction function """ logger.info( 'building the network, with one CNN for left and one for right') hidden_units = hidden_units or self._config['hidden_units'] logger.info('#hidden units: %d', hidden_units) # building the feature vector from input. mlp_in_e1, mlp_in_e2, mlp_in_dim = self.build_feature_vector_noMention( features) logger.info('feature vector size: %d', mlp_in_dim) mlp = MLP(activations=[Rectifier()], dims=[mlp_in_dim, hidden_units], seed=self.curSeed) initialize([mlp]) before_out_e1 = mlp.apply(mlp_in_e1) before_out_e2 = mlp.apply(mlp_in_e2) hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_units, output_dim=num_labels) initialize([hidden_to_output]) linear_output_e1 = hidden_to_output.apply(before_out_e1) linear_output_e2 = hidden_to_output.apply(before_out_e2) linear_output_e1.name = 'linear_output_e1' linear_output_e2.name = 'linear_output_e2' y_hat_e1 = Logistic(name='logistic1').apply(linear_output_e1) y_hat_e2 = Logistic(name='logistic2').apply(linear_output_e2) y_hat_e1.name = 'y_hat_e1' y_hat_e2.name = 'y_hat_e2' y_hat_e1 = debug_print(y_hat_e1, 'y_1', DEBUG) return y_hat_e1, y_hat_e2, before_out_e1, before_out_e2
def __init__(self, x_dim, hidden_layers, hidden_act, z_dim, batch_norm=False, l2reg=1e-3, **kwargs): super(VAE, self).__init__([], [], **kwargs) self.l2reg = l2reg inits = { 'weights_init': IsotropicGaussian(std=0.1), #'weights_init': RWSInitialization(factor=1.), 'biases_init': Constant(0.0), } if batch_norm: mlp_class = BatchNormalizedMLP else: mlp_class = MLP hidden_act = [hidden_act] * len(hidden_layers) q_mlp = mlp_class(hidden_act, [x_dim] + hidden_layers, **inits) p_mlp = mlp_class(hidden_act + [Logistic()], [z_dim] + hidden_layers + [x_dim], **inits) self.q = GaussianLayer(z_dim, q_mlp, **inits) self.p = BernoulliLayer(p_mlp, **inits) self.prior_log_sigma = numpy.zeros(z_dim) # self.prior_mu = numpy.zeros(z_dim) # self.children = [self.p, self.q]
def main(): x = tensor.matrix("features") input_to_hidden1 = get_typical_layer(x, 784, 500) #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300) hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20) latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500) #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500) hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic()) hidden2_to_output.name = "last_before_output" from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy from blocks.graph import ComputationGraph from blocks.algorithms import Adam, GradientDescent, Scale from blocks.roles import WEIGHT cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output) cg = ComputationGraph(cost) weights = VariableFilter(roles=[WEIGHT]) (cg.variables) # cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights)) # cost.name = "regularized error" gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) from blocks.main_loop import MainLoop from blocks.extensions import FinishAfter, Printing, ProgressBar from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring monitor = TrainingDataMonitoring([cost], after_epoch=True) main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5), ProgressBar(), Printing()]) main_loop.run() showcase(cg, "last_before_output")
def create_base_model(self, x, y, input_dim, interim_dim=30): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() inter = mlp.apply(x) fine_tuner = MLP([Logistic()], [interim_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) fine_tuner.initialize() probs = fine_tuner.apply(inter) #sq_err = BinaryCrossEntropy() err = T.sqr(y.flatten() - probs.flatten()) # cost = T.mean(err * y.flatten() * (1 - self.p) + err * # (1 - y.flatten()) * self.p) cost = T.mean(err) #cost = sq_err.apply(probs.flatten(), y.flatten()) # cost = T.mean(y.flatten() * T.log(probs.flatten()) + # (1 - y.flatten()) * T.log(1 - probs.flatten())) cost.name = 'cost' pred_out = probs > 0.5 mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten())) mis_cost.name = 'MisclassificationRate' return mlp, fine_tuner, cost, mis_cost
def build_mlp(features_cat, features_int, labels): mlp_int = MLP(activations=[Rectifier(), Rectifier()], dims=[19, 50, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_interval') mlp_int.initialize() mlp_cat = MLP(activations=[Logistic()], dims=[320, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_categorical') mlp_cat.initialize() mlp = MLP(activations=[Rectifier(), None], dims=[50, 50, 1], weights_init=IsotropicGaussian(), biases_init=Constant(0)) mlp.initialize() gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int) prediction = mlp.apply(gated) cost = MAPECost().apply(prediction, labels) cg = ComputationGraph(cost) print cg.variables cg_dropout1 = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost
def create_model(self, x, y, input_dim, tol=10e-5): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol) true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \ 1.0 / (T.sum(1 - y) + tol) #p = (T.sum(y) + tol) / (y.shape[0] + tol) theta = (1 - self.p) / self.p numerator = (1 + self.beta**2) * true_p denominator = self.beta**2 + theta + true_p - theta * true_n Fscore = numerator / denominator cost = -1 * Fscore cost.name = "cost" return mlp, cost, probs
def __init__(self, mlp, frame_size=259, k=20, const=1e-5, **kwargs): super(SPF0Emitter, self).__init__(**kwargs) self.mlp = mlp input_dim = self.mlp.output_dim self.const = const self.frame_size = frame_size mlp_gmm = GMMMLP(mlp=mlp, dim=(frame_size - 2) * k, k=k, const=const) self.gmm_emitter = GMMEmitter(gmmmlp=mlp_gmm, output_size=frame_size - 2, k=k, name="gmm_emitter") self.mu = MLP(activations=[Identity()], dims=[input_dim, 1], name=self.name + "_mu") self.sigma = MLP(activations=[SoftPlus()], dims=[input_dim, 1], name=self.name + "_sigma") self.binary = MLP(activations=[Logistic()], dims=[input_dim, 1], name=self.name + "_binary") self.children = [ self.mlp, self.mu, self.sigma, self.binary, self.gmm_emitter ]
def __init__(self, dim, num_copies, use_W_xu, activation=None, gate_activation=None, **kwargs): self.dim = dim self.num_copies = num_copies self.use_W_xu = use_W_xu # shape: C x F/2 permutations = [] indices = numpy.arange(self.dim / 2) for i in range(self.num_copies): numpy.random.shuffle(indices) permutations.append( numpy.concatenate( [indices, [ind + self.dim / 2 for ind in indices]])) # C x F (numpy) self.permutations = numpy.vstack(permutations) if not activation: activation = Tanh() if not gate_activation: gate_activation = Logistic() self.activation = activation self.gate_activation = gate_activation children = ([self.activation, self.gate_activation] + kwargs.get('children', [])) super(AssociativeLSTM, self).__init__(children=children, **kwargs)
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' y_pred = T.argmax(linear_output, axis=1) label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False) pat1 = T.mean(label_of_predicted) updates = None if 'ranking' in cost_fn: cost, updates = ranking_loss(linear_output, y) print 'using ranking loss function!' else: y_hat = Logistic().apply(linear_output) y_hat.name = 'y_hat' cost = cross_entropy_loss(y_hat, y) cost.name = 'cost' pat1.name = 'precision@1' misclassify_rate = MultiMisclassificationRate().apply( y, T.ge(linear_output, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, updates, misclassify_rate
def test_activations(): x = tensor.vector() x_val = numpy.random.rand(8).astype(theano.config.floatX) exp_x_val = numpy.exp(x_val) assert_allclose(x_val, Identity().apply(x).eval({x: x_val})) assert_allclose(numpy.tanh(x_val), Tanh().apply(x).eval({x: x_val}), rtol=1e-06) assert_allclose(numpy.log(1 + exp_x_val), Softplus(x).apply(x).eval({x: x_val}), rtol=1e-6) assert_allclose(exp_x_val / numpy.sum(exp_x_val), Softmax(x).apply(x).eval({ x: x_val }).flatten(), rtol=1e-6) assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)), Logistic(x).apply(x).eval({x: x_val}), rtol=1e-6) leaky_out_1 = x_val - 0.5 leaky_out_1[leaky_out_1 < 0] *= 0.01 assert_allclose(leaky_out_1, LeakyRectifier().apply(x).eval({x: x_val - 0.5}), rtol=1e-5) leaky_out_2 = x_val - 0.5 leaky_out_2[leaky_out_2 < 0] *= 0.05 assert_allclose(leaky_out_2, LeakyRectifier(leak=0.05).apply(x).eval({x: x_val - 0.5}), rtol=1e-5)
def create_model_bricks(): convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(4, 4), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional( filter_size=(4, 4), num_filters=64, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional( filter_size=(3, 3), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), ], num_channels=3, image_size=(64, 64), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='convnet') convnet.initialize() mlp = BatchNormalizedMLP( activations=[Rectifier(), Logistic()], dims=[numpy.prod(convnet.get_dim('output')), 1000, 40], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='mlp') mlp.initialize() return convnet, mlp
def __init__(self, input_dim, output_dim, hidden_size, init_ranges, **kwargs): linear1 = LinearMaxout(input_dim=input_dim, output_dim=hidden_size, num_pieces=2, name='linear1') linear2 = LinearMaxout(input_dim=hidden_size, output_dim=hidden_size, num_pieces=2, name='linear2') linear3 = Linear(input_dim=hidden_size, output_dim=output_dim) logistic = Logistic() bricks = [ linear1, BatchNormalization(input_dim=hidden_size, name='bn2'), linear2, BatchNormalization(input_dim=hidden_size, name='bnl'), linear3, logistic ] for init_range, b in zip(init_ranges, (linear1, linear2, linear3)): b.biases_init = initialization.Constant(0) b.weights_init = initialization.Uniform(width=init_range) kwargs.setdefault('use_bias', False) super(ConcatenateClassifier, self).__init__([b.apply for b in bricks], **kwargs)
def __init__(self, **kwargs): children = [] self.layers_numerical = [] self.layers_numerical.append( Linear(name='input_to_numerical_linear', input_dim=5000, output_dim=17, weights_init=IsotropicGaussian(), biases_init=Constant(1))) self.layers_categorical = [] self.layers_categorical.append( Linear(name='input_to_categorical_linear', input_dim=5000, output_dim=24016, weights_init=IsotropicGaussian(), biases_init=Constant(1))) self.layers_categorical.append( Logistic(name='input_to_categorical_sigmoid')) children += self.layers_numerical children += self.layers_categorical kwargs.setdefault('children', []).extend(children) super(build_top_mlp, self).__init__(**kwargs)
def __init__(self, input_dim, output_activation=None, transform_activation=None, **kwargs): super(Highway, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = input_dim if output_activation == None: output_activation = Rectifier() if transform_activation == None: transform_activation = Logistic() self._linear_h = Linear(name="linear_h", input_dim=input_dim, output_dim=input_dim) self._linear_t = Linear(name="linear_t", input_dim=input_dim, output_dim=input_dim) self._output_activation = output_activation self._transform_activation = transform_activation self.children = [ self._linear_h, self._linear_t, self._output_activation, self._transform_activation ]
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def __init__(self, image_dimension, **kwargs): layers = [] ############################################# # a first block with 2 convolutions of 32 (3, 3) filters layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 32, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 2nd block with 3 convolutions of 64 (3, 3) filters layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 64, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) ############################################# # a 3rd block with 4 convolutions of 128 (3, 3) filters layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) layers.append(Convolutional((3, 3), 128, border_mode='half')) layers.append(Rectifier()) # maxpool with size=(2, 2) layers.append(MaxPooling((2, 2))) self.conv_sequence = ConvolutionalSequence(layers, 3, image_size=image_dimension) flattener = Flattener() self.top_mlp = MLP(activations=[Rectifier(), Logistic()], dims=[500, 1]) application_methods = [ self.conv_sequence.apply, flattener.apply, self.top_mlp.apply ] super(VGGNet, self).__init__(application_methods, biases_init=Constant(0), weights_init=Uniform(width=.1), **kwargs)
def setUp(self): self.mlp = MLP([ Sequence([Identity(name='id1').apply, Tanh(name='tanh1').apply], name='sequence1'), Sequence([ Logistic(name='logistic1').apply, Identity(name='id2').apply, Tanh(name='tanh2').apply ], name='sequence2'), Logistic(name='logistic2'), Sequence([ Sequence([Logistic(name='logistic3').apply], name='sequence4').apply ], name='sequence3') ], [10, 5, 9, 5, 9])
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def test_collect(): x = tensor.matrix() mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784], use_bias=False) cost = SquaredError().apply(x, mlp.apply(x)) cg = ComputationGraph(cost) var_filter = VariableFilter(roles=[PARAMETER]) W1, W2 = var_filter(cg.variables) for i, W in enumerate([W1, W2]): W.set_value(numpy.ones_like(W.get_value()) * (i + 1)) new_cg = collect_parameters(cg, cg.shared_variables) collected_parameters, = new_cg.shared_variables assert numpy.all(collected_parameters.get_value()[:784 * 100] == 1.) assert numpy.all(collected_parameters.get_value()[784 * 100:] == 2.) assert collected_parameters.ndim == 1 W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables) assert W1.eval().shape == (784, 100) assert numpy.all(W1.eval() == 1.) assert W2.eval().shape == (100, 784) assert numpy.all(W2.eval() == 2.)
def __init__(self, dim, activation=None, gate_activation=None, **kwargs): self.dim = dim if not activation: activation = Tanh() if not gate_activation: gate_activation = Logistic() self.activation = activation self.gate_activation = gate_activation children = [activation, gate_activation] + kwargs.get('children', []) super(GatedRecurrent, self).__init__(children=children, **kwargs)
def __init__(self, dim, activation=None, gate_activation=None, **kwargs): self.dim = dim if not activation: activation = Tanh() if not gate_activation: gate_activation = Logistic() self.activation = activation self.gate_activation = gate_activation children = [activation, gate_activation] kwargs.setdefault('children', []).extend(children) super(ZoneoutGRU, self).__init__(**kwargs)
def __init__(self, dim, activation=None, gate_activation=None, **kwargs): super(GRU, self).__init__(**kwargs) self.dim = dim if not activation: activation = Tanh() if not gate_activation: gate_activation = Logistic() self.activation = activation self.gate_activation = gate_activation self.children = [activation, gate_activation]
def test_activations(): x = tensor.vector() x_val = numpy.random.rand(8).astype(theano.config.floatX) exp_x_val = numpy.exp(x_val) assert_allclose(x_val, Identity().apply(x).eval({x: x_val})) assert_allclose(numpy.tanh(x_val), Tanh().apply(x).eval({x: x_val}), rtol=1e-06) assert_allclose(numpy.log(1 + exp_x_val), Softplus(x).apply(x).eval({x: x_val}), rtol=1e-6) assert_allclose(exp_x_val / numpy.sum(exp_x_val), Softmax(x).apply(x).eval({x: x_val}).flatten(), rtol=1e-6) assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)), Logistic(x).apply(x).eval({x: x_val}), rtol=1e-6)
def __init__(self, x_dim, hidden_layers, hidden_act, z_dim, **kwargs): super(DVAE, self).__init__([], [], **kwargs) inits = { #'weights_init': IsotropicGaussian(std=0.1), 'weights_init': RWSInitialization(factor=1.), 'biases_init': Constant(0.0), } hidden_act = [hidden_act] * len(hidden_layers) q_mlp = BatchNormalizedMLP(hidden_act + [Logistic()], [x_dim] + hidden_layers + [z_dim], **inits) #q_mlp = MLP(hidden_act+[Logistic()], [x_dim]+hidden_layers+[z_dim], **inits) p_mlp = BatchNormalizedMLP(hidden_act + [Logistic()], [z_dim] + hidden_layers + [x_dim], **inits) #p_mlp = MLP(hidden_act+[Logistic()], [z_dim]+hidden_layers+[x_dim], **inits) self.q = BernoulliLayer(q_mlp, name="q") self.p = BernoulliLayer(p_mlp, name="p") self.p_top = BernoulliTopLayer(z_dim, biases_init=Constant(0.0)) self.children = [self.p_top, self.p, self.q]