def __init__(self, config, **kwargs): super(Model, self).__init__(config, **kwargs) self.dest_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest], name='dest_mlp') self.time_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time], name='time_mlp') self.dest_classes = theano.shared(numpy.array( config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes') self.time_classes = theano.shared(numpy.array( config.time_tgtcls, dtype=theano.config.floatX), name='time_classes') self.inputs.append('input_time') self.children.extend([self.dest_mlp, self.time_mlp])
def __init__(self, state_names, state_dims, sequence_dim, match_dim, state_transformer=None, sequence_transformer=None, energy_computer=None, weights_init=None, biases_init=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) update_instance(self, locals()) self.state_transformers = Parallel(state_names, self.state_transformer, name="state_trans") if not self.sequence_transformer: self.sequence_transformer = MLP([Identity()], name="seq_trans") if not self.energy_computer: self.energy_computer = MLP([Identity()], name="energy_comp") self.children = [ self.state_transformers, self.sequence_transformer, self.energy_computer ]
def build_mlp(features_cat, features_int, labels): mlp_int = MLP(activations=[Rectifier(), Rectifier()], dims=[19, 50, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_interval') mlp_int.initialize() mlp_cat = MLP(activations=[Logistic()], dims=[320, 50], weights_init=IsotropicGaussian(), biases_init=Constant(0), name='mlp_categorical') mlp_cat.initialize() mlp = MLP(activations=[Rectifier(), None], dims=[50, 50, 1], weights_init=IsotropicGaussian(), biases_init=Constant(0)) mlp.initialize() gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int) prediction = mlp.apply(gated) cost = MAPECost().apply(prediction, labels) cg = ComputationGraph(cost) print cg.variables cg_dropout1 = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost
def create_vae(x=None, batch=batch_size): x = T.matrix('features') if x is None else x x = x / 255. encoder = MLP( activations=[Rectifier(), Logistic()], dims=[img_dim**2, hidden_dim, 2*latent_dim], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='encoder' ) encoder.initialize() z_param = encoder.apply(x) z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim] z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size) decoder = MLP( activations=[Rectifier(), Logistic()], dims=[latent_dim, hidden_dim, img_dim**2], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='decoder' ) decoder.initialize() x_reconstruct = decoder.apply(z) cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std) cost.name = 'vae_cost' return cost
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder( config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder( config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent') in2 = config.hidden_state_dim + sum( x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_cells")
def __init__(self, stack_dim=500, **kwargs): """Sole constructor. Args: stack_dim (int): Size of vectors on the stack. """ super(PushDownSequenceContentAttention, self).__init__(**kwargs) self.stack_dim = stack_dim self.max_stack_depth = 25 self.stack_op_names = self.state_names + ['weighted_averages'] self.stack_pop_transformer = MLP(activations=[Logistic()], dims=None) self.stack_pop_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_pop_transformer, name="stack_pop") self.stack_push_transformer = MLP(activations=[Logistic()], dims=None) self.stack_push_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_push_transformer, name="stack_push") self.stack_input_transformer = Linear() self.stack_input_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_input_transformer, name="stack_input") self.children.append(self.stack_pop_transformers) self.children.append(self.stack_push_transformers) self.children.append(self.stack_input_transformers)
def create_base_model(self, x, y, input_dim, interim_dim=30): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() inter = mlp.apply(x) fine_tuner = MLP([Logistic()], [interim_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) fine_tuner.initialize() probs = fine_tuner.apply(inter) #sq_err = BinaryCrossEntropy() err = T.sqr(y.flatten() - probs.flatten()) # cost = T.mean(err * y.flatten() * (1 - self.p) + err * # (1 - y.flatten()) * self.p) cost = T.mean(err) #cost = sq_err.apply(probs.flatten(), y.flatten()) # cost = T.mean(y.flatten() * T.log(probs.flatten()) + # (1 - y.flatten()) * T.log(1 - probs.flatten())) cost.name = 'cost' pred_out = probs > 0.5 mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten())) mis_cost.name = 'MisclassificationRate' return mlp, fine_tuner, cost, mis_cost
def __init__(self, mlp, frame_size=259, k=20, const=1e-5, **kwargs): super(SPF0Emitter, self).__init__(**kwargs) self.mlp = mlp input_dim = self.mlp.output_dim self.const = const self.frame_size = frame_size mlp_gmm = GMMMLP(mlp=mlp, dim=(frame_size - 2) * k, k=k, const=const) self.gmm_emitter = GMMEmitter(gmmmlp=mlp_gmm, output_size=frame_size - 2, k=k, name="gmm_emitter") self.mu = MLP(activations=[Identity()], dims=[input_dim, 1], name=self.name + "_mu") self.sigma = MLP(activations=[SoftPlus()], dims=[input_dim, 1], name=self.name + "_sigma") self.binary = MLP(activations=[Logistic()], dims=[input_dim, 1], name=self.name + "_binary") self.children = [ self.mlp, self.mu, self.sigma, self.binary, self.gmm_emitter ]
def test_model(): x = tensor.matrix('x') mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1") mlp2 = MLP([Tanh()], [30, 40], name="mlp2") h1 = mlp1.apply(x) h2 = mlp2.apply(h1) model = Model(h2) assert model.get_top_bricks() == [mlp1, mlp2] # The order of parameters returned is deterministic but # not sensible. assert list(model.get_parameter_dict().items()) == [ ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b), ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b), ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b), ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W), ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W), ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W) ] # Test getting and setting parameter values mlp3 = MLP([Tanh()], [10, 10]) mlp3.allocate() model3 = Model(mlp3.apply(x)) parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones( (10, 10), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX) } model3.set_parameter_values(parameter_values) assert numpy.all( mlp3.linear_transformations[0].parameters[0].get_value() == 2) assert numpy.all( mlp3.linear_transformations[0].parameters[1].get_value() == 3) got_parameter_values = model3.get_parameter_values() assert len(got_parameter_values) == len(parameter_values) for name, value in parameter_values.items(): assert_allclose(value, got_parameter_values[name]) # Test exception is raised if parameter shapes don't match def helper(): parameter_values = { '/mlp/linear_0.W': 2 * numpy.ones( (11, 11), dtype=theano.config.floatX), '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX) } model3.set_parameter_values(parameter_values) assert_raises(ValueError, helper) # Test name conflict handling mlp4 = MLP([Tanh()], [10, 10]) def helper(): Model(mlp4.apply(mlp3.apply(x))) assert_raises(ValueError, helper)
def test_add_to_dump(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) mlp2 = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False, name='mlp2') mlp2.initialize() # Ensure that adding to dump is working. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb+') as ff: add_to_dump(mlp.children[0], ff, 'child_0', parameters=[mlp.children[0].W]) add_to_dump(mlp.children[1], ff, 'child_1') with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set( ['_pkl', '_parameters', 'child_0', 'child_1']) # Ensure that we can load any object from the tarball. with open(f.name, 'rb') as ff: saved_children_0 = load(ff, 'child_0') saved_children_1 = load(ff, 'child_1') assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10))) assert_allclose(saved_children_1.W.get_value(), numpy.ones((10, 10)) * 2) # Check the error if using a reserved name. with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl']) # Check the error if saving an object with other parameters with open(f.name, 'rb+') as ff: assert_raises( ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W])) # Check the warning if adding to a dump with no parameters with NamedTemporaryFile(delete=False) as f: dump(mlp, f) with open(f.name, 'rb+') as ff: assert_raises( ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
def __init__(self, input_dim, hidden_dim, **kwargs): super(VariationalAutoEncoder, self).__init__(**kwargs) encoder_mlp = MLP([Sigmoid(), Identity()], [input_dim, 101, None]) decoder_mlp = MLP([Sigmoid(), Sigmoid()], [hidden_dim, 101, input_dim]) self.hidden_dim = hidden_dim self.encoder = VAEEncoder(encoder_mlp, hidden_dim) self.decoder = VAEDecoder(decoder_mlp) self.children = [self.encoder, self.decoder]
def __init__(self, attended_dim, **kwargs): super(LSTM2GO, self).__init__(**kwargs) self.attended_dim = attended_dim self.initial_transformer_s = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='state_initializer') self.children.append(self.initial_transformer_s) self.initial_transformer_c = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='cell_initializer') self.children.append(self.initial_transformer_c)
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): mlp_car = MLP(activations=[Rectifier(), Rectifier(), None], dims=[8 + 185, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_car') mlp_car.initialize() mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5 + 135, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_nocar') mlp_nocar.initialize() feature_car = tensor.concatenate((features_car_cat, features_car_int), axis=1) feature_nocar = tensor.concatenate( (features_nocar_cat, features_nocar_int), axis=1) prediction = mlp_nocar.apply(feature_nocar) # gating with the last feature : does the dude own a car prediction += tensor.addbroadcast(features_hascar, 1) * mlp_car.apply(feature_car) prediction_loc, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) prediction += prediction_loc # add crm mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def __init__(self, input_dim, dim, mlp_hidden_dims, batch_size, image_shape, patch_shape, activation=None, **kwargs): super(LSTMAttention, self).__init__(**kwargs) self.dim = dim self.image_shape = image_shape self.patch_shape = patch_shape self.batch_size = batch_size non_lins = [Rectifier()] * (len(mlp_hidden_dims) - 1) + [None] mlp_dims = [input_dim + dim] + mlp_hidden_dims mlp = MLP(non_lins, mlp_dims, weights_init=self.weights_init, biases_init=self.biases_init, name=self.name + '_mlp') hyperparameters = {} hyperparameters["cutoff"] = 3 hyperparameters["batched_window"] = True cropper = LocallySoftRectangularCropper( patch_shape=patch_shape, hyperparameters=hyperparameters, kernel=Gaussian()) if not activation: activation = Tanh() self.children = [activation, mlp, cropper]
def __init__(self, attended_dim, **kwargs): super(GRUInitialState, self).__init__(**kwargs) self.attended_dim = attended_dim self.initial_transformer = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='state_initializer') self.children.append(self.initial_transformer)
def setup_ff_network(in_dim, out_dim, num_layers, num_neurons): """Setup a feedforward neural network. Parameters ---------- in_dim : int input dimension of network out_dim : int output dimension of network num_layers : int number of hidden layers num_neurons : int number of neurons of each layer Returns ------- net : object network structure """ activations = [Rectifier()] dims = [in_dim] for i in xrange(num_layers): activations.append(Rectifier()) dims.append(num_neurons) dims.append(out_dim) net = MLP(activations=activations, dims=dims, weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) return net
def test_pylearn2_trainin(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() block_cost = BlocksCost(cost) block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features')) # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def construct_mlp(name, hidden_dims, input_dim, initargs, batch_normalize, activations=None): if not hidden_dims: return FeedforwardIdentity(dim=input_dim) if not activations: activations = [Rectifier() for dim in hidden_dims] elif not isinstance(activations, collections.Iterable): activations = [activations] * len(hidden_dims) assert len(activations) == len(hidden_dims) dims = [input_dim] + hidden_dims wrapped_activations = [ NormalizedActivation(shape=[hidden_dim], name="activation_%i" % i, batch_normalize=batch_normalize, activation=activation) for i, (hidden_dim, activation) in enumerate(zip(hidden_dims, activations)) ] mlp = MLP(name=name, activations=wrapped_activations, dims=dims, **initargs) # biases are handled by our activation function for layer in mlp.linear_transformations: layer.use_bias = False return mlp
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def __init__(self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode='valid', **kwargs): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode conv_parameters = zip(filter_sizes, feature_maps) # Construct convolutional, activation, and pooling layers with corresponding parameters self.convolution_layer = ( Convolutional(filter_size=filter_size, num_filters=num_filter, step=self.conv_step, border_mode=self.border_mode, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)) self.BN_layer = (BatchNormalization(name='bn_conv_{}'.format(i)) for i in enumerate(conv_parameters)) self.pooling_layer = (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes)) self.layers = list( interleave([ self.convolution_layer, self.BN_layer, conv_activations, self.pooling_layer ])) self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply ] super(LeNet, self).__init__(application_methods, **kwargs)
def construct_model(input_dim, output_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim] + hidden_dims + [2]) weights = mlp.apply(r) final = tensor.dot(x, weights) cost = Softmax().categorical_cross_entropy(y, final).mean() pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) apply_noise(cg, noise_vars, noise_std) [cost, error_rate] = cg.outputs return cost, error_rate
def test_mlp_use_bias_pushed_when_not_explicitly_specified(): mlp = MLP(activations=[Tanh(), Tanh(), None], dims=[4, 5, 6, 7], prototype=Linear(use_bias=False), use_bias=True) mlp.push_allocation_config() assert [lin.use_bias for lin in mlp.linear_transformations]
def __init__(self, **kwargs): conv_layers = [ Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_1'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_2'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_2'), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_3'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_4'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_4'), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_5'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_6'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_7'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_7'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_8'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_9'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_10'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_10'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_11'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_12'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_13'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_13'), ] mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()], [25088, 4096, 4096, 1000], ) conv_sequence = ConvolutionalSequence( conv_layers, 3, image_size=(224, 224)) super(VGGNet, self).__init__( [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
def __init__(self, state_names, state_dims, sequence_dim, match_dim, state_transformer=None, sequence_transformer=None, energy_computer=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) self.state_names = state_names self.state_dims = state_dims self.sequence_dim = sequence_dim self.match_dim = match_dim self.state_transformer = state_transformer self.state_transformers = Parallel(input_names=state_names, prototype=state_transformer, name="state_trans") if not sequence_transformer: sequence_transformer = MLP([Identity()], name="seq_trans") if not energy_computer: energy_computer = EnergyComputer(name="energy_comp") self.sequence_transformer = sequence_transformer self.energy_computer = energy_computer self.children = [ self.state_transformers, sequence_transformer, energy_computer ]
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): prediction, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4) cost_dropout = cg_dropout.outputs[0] return prediction, cost_dropout, cg_dropout.parameters, cost
def build_mlp(features_int, features_cat, labels, labels_mean): inputs = tensor.concatenate([features_int, features_cat], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), None], dims=[337, 800, 1200, 1], weights_init=IsotropicGaussian(), biases_init=Constant(1)) mlp.initialize() prediction = mlp.apply(inputs) cost = MAPECost().apply(prediction, labels, labels_mean) cg = ComputationGraph(cost) #cg_dropout0 = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2) cg_dropout1 = apply_dropout(cg, [ VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3], VariableFilter(roles=[OUTPUT])(cg.variables)[5] ], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost #cost, cg.parameters, cost #
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) top_mlp = MLP([ LeakyRectifier(name='non_linear_9'), LeakyRectifier(name='non_linear_10'), Softmax(name='non_linear_11') ], [conv_out_dim, 2048, 612, 10], weights_init=IsotropicGaussian(), biases_init=Constant(1)) # Construct feedforward sequence ss_seq = FeedforwardSequence( [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction) return cost
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def create_model(self, x, y, input_dim, tol=10e-5): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol) true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \ 1.0 / (T.sum(1 - y) + tol) #p = (T.sum(y) + tol) / (y.shape[0] + tol) theta = (1 - self.p) / self.p numerator = (1 + self.beta**2) * true_p denominator = self.beta**2 + theta + true_p - theta * true_n Fscore = numerator / denominator cost = -1 * Fscore cost.name = "cost" return mlp, cost, probs