def __init__(self, config, **kwargs): super(Model, self).__init__(config, **kwargs) self.dest_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest], name='dest_mlp') self.time_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time], name='time_mlp') self.dest_classes = theano.shared(numpy.array( config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes') self.time_classes = theano.shared(numpy.array( config.time_tgtcls, dtype=theano.config.floatX), name='time_classes') self.inputs.append('input_time') self.children.extend([self.dest_mlp, self.time_mlp])
def build_training(lr=0.002, model=None): x = T.tensor4('x') y = T.imatrix() if model is None: model = build_model() y_prev = model.apply(x) y_softmax =Softmax().apply(y_prev) ##### prediction ##### #cost = CategoricalCrossEntropy().apply(y.flatten(), y_prev).mean() cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean() error = MisclassificationRate().apply(y.flatten(), y_softmax).mean() W, B = get_Params(y_prev) params = W + B regulizer_full = sum([w.norm(2) for w in W[0:2]]) regulizer_conv = sum([w.norm(2) for w in W[2:]]) cost = cost #+ 0.01*regulizer_conv #+ 0.001*regulizer_conv updates, updates_init = RMSProp(cost, params, lr) #updates, updates_init = Adam(cost, params, lr) #updates = Sgd(cost, params, lr) train_function = theano.function([x,y], cost, updates=updates, allow_input_downcast=True) valid_function = theano.function([x,y], cost, allow_input_downcast=True) test_function = theano.function([x,y], error, allow_input_downcast=True) reinit = theano.function([], T.zeros((1,)), updates=updates_init) observation = theano.function([], [w.norm(2) for w in W]) """ reg_function = theano.function([], T.zeros((1,)), updates=clip(W), allow_input_downcast=True) observation = theano.function([], [w.norm(2) for w in W]) """ return train_function, valid_function, test_function, model, reinit
def get_config(config): config1 = {} if config == '5layers': config1['num_epochs'] = 150 config1['num_channels'] = 3 config1['image_shape'] = (192, 192) config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5)] config1['num_filter'] = [32, 48, 64, 128, 256] config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2), (2, 2)] config1['mlp_hiddens'] = [1000, 100] config1['output_size'] = 2 config1['batch_size'] = 16 config1['activation'] = [Rectifier() for _ in config1['num_filter']] config1['mlp_activation'] = [ Rectifier().apply for _ in config1['mlp_hiddens'] ] + [Softmax().apply] config1['num_batches'] = None elif config == '4layers': config1['num_epochs'] = 100 config1['num_channels'] = 3 config1['image_shape'] = (160, 160) config1['filter_size'] = [(5, 5), (5, 5), (5, 5), (5, 5)] config1['num_filter'] = [32, 64, 128, 128] config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2), (2, 2)] config1['mlp_hiddens'] = [1000, 100] config1['output_size'] = 2 config1['batch_size'] = 32 config1['activation'] = [Rectifier() for _ in config1['num_filter']] config1['mlp_activation'] = [ Rectifier().apply for _ in config1['mlp_hiddens'] ] + [Softmax().apply] config1['num_batches'] = None else: config1['num_epochs'] = 100 config1['num_channels'] = 3 config1['image_shape'] = (128, 128) config1['filter_size'] = [(5, 5), (5, 5), (5, 5)] config1['num_filter'] = [20, 50, 80] config1['pooling_sizes'] = [(2, 2), (2, 2), (2, 2)] config1['mlp_hiddens'] = [1000] config1['output_size'] = 2 config1['batch_size'] = 64 config1['activation'] = [Rectifier() for _ in config1['num_filter']] config1['mlp_activation'] = [ Rectifier().apply for _ in config1['mlp_hiddens'] ] + [Softmax().apply] config1['num_batches'] = 11000 if config == 'test': print("Test run...") config1['test'] = True else: print("Using default config..") return config1
def __init__(self, hidden_dim, n_classes, **kwargs): super(SingleSoftmax, self).__init__(**kwargs) self.hidden_dim = hidden_dim self.n_classes = n_classes self.mlp = MLP(activations=[Rectifier(), Softmax()], dims=[hidden_dim, hidden_dim / 2, self.n_classes], weights_init=Orthogonal(), biases_init=Constant(0)) self.softmax = Softmax() self.children = [self.mlp, self.softmax]
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) top_mlp = MLP([ LeakyRectifier(name='non_linear_9'), LeakyRectifier(name='non_linear_10'), Softmax(name='non_linear_11') ], [conv_out_dim, 2048, 612, 10], weights_init=IsotropicGaussian(), biases_init=Constant(1)) # Construct feedforward sequence ss_seq = FeedforwardSequence( [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction) return cost
def __init__(self, config): self.X = T.tensor4("features") c = config seq = BrickSequence( input_dim=(3, 32, 32), bricks=[ conv3(c['n_l1']), conv3(c['n_l2']), max_pool(), conv3(c['n_l3']), conv3(c['n_l4']), max_pool(), #conv3(10), #conv3(10), Flattener(), linear(c['n_l5']), Softmax() ]) seq.initialize() self.pred = seq.apply(self.X) self.Y = T.imatrix("targets") self.cost = CategoricalCrossEntropy().apply(self.Y.flatten(), self.pred) self.cost.name = "cost" self.accur = 1.0 - MisclassificationRate().apply( self.Y.flatten(), self.pred) self.accur.name = "accur"
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(input_dim=10000, dim=500, mlp_hidden_dims=[2000, 500, 4], batch_size=100, image_shape=(100, 100), patch_shape=(28, 28), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) model.initialize() h, c = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [500, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) return cost, error_rate
def create_lenet_5(): feature_maps = [6, 16] mlp_hiddens = [120, 84] conv_sizes = [5, 5] pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # The above are from LeCun's paper. The blocks example had: # feature_maps = [20, 50] # mlp_hiddens = [500] # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='valid', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() return convnet
def test_activations(): x = tensor.vector() x_val = numpy.random.rand(8).astype(theano.config.floatX) exp_x_val = numpy.exp(x_val) assert_allclose(x_val, Identity().apply(x).eval({x: x_val})) assert_allclose(numpy.tanh(x_val), Tanh().apply(x).eval({x: x_val}), rtol=1e-06) assert_allclose(numpy.log(1 + exp_x_val), Softplus(x).apply(x).eval({x: x_val}), rtol=1e-6) assert_allclose(exp_x_val / numpy.sum(exp_x_val), Softmax(x).apply(x).eval({ x: x_val }).flatten(), rtol=1e-6) assert_allclose(1.0 / (1.0 + numpy.exp(-x_val)), Logistic(x).apply(x).eval({x: x_val}), rtol=1e-6) leaky_out_1 = x_val - 0.5 leaky_out_1[leaky_out_1 < 0] *= 0.01 assert_allclose(leaky_out_1, LeakyRectifier().apply(x).eval({x: x_val - 0.5}), rtol=1e-5) leaky_out_2 = x_val - 0.5 leaky_out_2[leaky_out_2 < 0] *= 0.05 assert_allclose(leaky_out_2, LeakyRectifier(leak=0.05).apply(x).eval({x: x_val - 0.5}), rtol=1e-5)
def training(repo, learning_rate, batch_size, filenames): print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) print 'BUILD MODEL' train_f, valid_f, test_f, model, fisher, params = build_training() x_train = x_train[:1000] y_train = y_train[:1000] x = T.tensor4() y = T.imatrix() output = model.apply(x) output = output.reshape( (x.shape[0], model.get_dim('output'))) #TO DO : get_dim('name') for Architecture cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean() cg = ComputationGraph(cost) inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg) outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg) inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) dico = OrderedDict([('conv_output', outputs_conv[0])]) [grad_s] = T.grad(cost, outputs_conv) dico['conv_output'] = grad_s f = theano.function([x, y], grad_s, allow_input_downcast=True, on_unused_input='ignore') print np.mean(f(x_train[:10], y_train[:10]))
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean() return cost
def __init__(self, image_shape=None, output_size=None, noise_batch_size=None, noise_without_rectifier=False, noise_after_rectifier=False, **kwargs): self.num_channels = 3 self.image_shape = image_shape or (32, 32) self.output_size = output_size or 10 self.noise_batch_size = noise_batch_size conv_parameters = [(96, 3, 1, 'half', Convolutional), (96, 3, 1, 'half', Convolutional), (96, 3, 2, 'half', NoisyConvolutional), (192, 3, 1, 'half', Convolutional), (192, 3, 1, 'half', Convolutional), (192, 3, 2, 'half', NoisyConvolutional), (192, 3, 1, 'half', Convolutional), (192, 1, 1, 'valid', Convolutional), (10, 1, 1, 'valid', Convolutional)] fc_layer = 10 self.convolutions = [] layers = [] for i, (num_filters, filter_size, conv_step, border_mode, cls) in enumerate(conv_parameters): if cls == NoisyConvolutional and noise_after_rectifier: cls = NoisyConvolutional2 layer = cls(filter_size=(filter_size, filter_size), num_filters=num_filters, step=(conv_step, conv_step), border_mode=border_mode, tied_biases=True, name='conv_{}'.format(i)) if cls == NoisyConvolutional or cls == NoisyConvolutional2: layer.noise_batch_size = self.noise_batch_size self.convolutions.append(layer) layers.append(layer) if cls != NoisyConvolutional2 and not noise_without_rectifier: layers.append(Rectifier()) self.conv_sequence = ConvolutionalSequence(layers, self.num_channels, image_size=self.image_shape) # The AllConvNet applies average pooling to combine top-level # features across the image. self.flattener = GlobalAverageFlattener() # Then it inserts one final 10-way FC layer before softmax # self.top_mlp = MLP([Rectifier(), Softmax()], # [conv_parameters[-1][0], fc_layer, self.output_size]) self.top_softmax = Softmax() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_softmax.apply ] super(NoisyAllConvNet, self).__init__(application_methods, **kwargs)
def __init__(self, batch_size, output_length, visual_dim, word_dim, visual_feature_dim, question_feature_dim, joint_dim, memory_dim, output_dim, fc1_dim, fc2_dim, voc_size): # the video encoder self.video_encoder = visualEncoder( visual_dim, visual_feature_dim) self.sentence_encoder = questionEncoder( word_dim, question_feature_dim) self.toJoint = embeddingLayer( 2 * question_feature_dim, 2 * visual_feature_dim, joint_dim) self.rewatcher = videoAttentionLayer( joint_dim, memory_dim, output_dim) self.seq_gen = seqDecoder( joint_dim, output_dim, fc1_dim, fc2_dim) self.softmax_layer = Softmax() self.bs = batch_size self.output_length = output_length self.voc_size = voc_size
def construct_model(input_dim, output_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim] + hidden_dims + [2]) weights = mlp.apply(r) final = tensor.dot(x, weights) cost = Softmax().categorical_cross_entropy(y, final).mean() pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) apply_noise(cg, noise_vars, noise_std) [cost, error_rate] = cg.outputs return cost, error_rate
def __init__(self, **kwargs): conv_layers = [ Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_1'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=64, border_mode=(1, 1), name='conv_2'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_2'), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_3'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=128, border_mode=(1, 1), name='conv_4'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_4'), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_5'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_6'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=256, border_mode=(1, 1), name='conv_7'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_7'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_8'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_9'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_10'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_10'), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_11'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_12'), Rectifier(), Convolutional(filter_size=(3, 3), num_filters=512, border_mode=(1, 1), name='conv_13'), Rectifier(), MaxPooling((2, 2), step=(2, 2), name='pool_13'), ] mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()], [25088, 4096, 4096, 1000], ) conv_sequence = ConvolutionalSequence( conv_layers, 3, image_size=(224, 224)) super(VGGNet, self).__init__( [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
def __init__(self, feature_dim, hidden_dim, output_dim): self.image_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='image_embed') self.word_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='word_embed') self.r_embed = Linear(input_dim=feature_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_embed') self.m_to_s = Linear(input_dim=hidden_dim, output_dim=1, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='m_to_s') self.attention_dist = Softmax(name='attention_dist_softmax') self.r_to_r = Linear(input_dim=feature_dim, output_dim=feature_dim, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False, name='r_to_r') # self.r_to_g = Linear(input_dim=feature_dim, # output_dim=output_dim, # weights_init=IsotropicGaussian(0.01), # biases_init=Constant(0), # use_bias=False, # name='r_to_g') self.image_embed.initialize() self.word_embed.initialize() self.r_embed.initialize() self.m_to_s.initialize() self.r_to_r.initialize() # self.r_to_g.initialize() # the sequence to sequence LSTM self.seq = LSTM(output_dim, name='rewatcher_seq', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) self.seq_embed = Linear(feature_dim, output_dim * 4, name='rewatcher_seq_embed', weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), use_bias=False) self.seq.initialize() self.seq_embed.initialize()
def construct_model(activation_function, r_dim, hidden_dim, out_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx # Get a representation of r of size r_dim r = DAE(r) # r is now nj x r_dim # r_rep is nx x nj x r_dim r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (r_dim + 1) concat = tensor.concatenate([r_rep, x3], axis=2) # Change concat from Batch x Time x Features to T X B x F rnn_input = concat.dimshuffle(1, 0, 2) linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim, name="input_linear") lstm = LSTM(dim=hidden_dim, activation=activation_function, name="hidden_recurrent") top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim, name="out_linear") pre_rnn = linear.apply(rnn_input) states = lstm.apply(pre_rnn)[0] activations = top_linear.apply(states) activations = tensor.mean(activations, axis=0) cost = Softmax().categorical_cross_entropy(y, activations) pred = activations.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in (linear, lstm, top_linear): brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() return cost, error_rate
def onestepEncAttn(hEncAttn): preEncattn = attentionmlpEnc.apply(hEncAttn) attEncsoft = Softmax() attEncpyx = attEncsoft.apply(preEncattn.flatten()) attEncpred = attEncpyx.flatten() attenc = T.mul(hEncAttn.dimshuffle(1,0), attEncpred).dimshuffle(1,0) return attenc
def onestepContextAttn(hContextAttn): preContextatt = attentionmlpContext.apply(hContextAttn) attContextsoft = Softmax() attContextpyx = attContextsoft.apply(preContextatt.flatten()) attContextpred = attContextpyx.flatten() attcontext = T.mul(hContextAttn.dimshuffle(1,0), attContextpred).dimshuffle(1,0) return attcontext
def __init__(self, config, **kwargs): super(Model, self).__init__(config, output_dim=config.tgtcls.shape[0], **kwargs) self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX), name='classes') self.softmax = Softmax() self.children.append(self.softmax)
def get_costs(presoft, args): if has_indices(args.dataset): # Targets: (Time X Batch) y = tensor.lmatrix('targets') y_mask = tensor.ones_like(y, dtype=floatX) y_mask = tensor.set_subtensor( y_mask[:args.context, :], tensor.zeros_like(y_mask[:args.context, :], dtype=floatX)) time, batch, feat = presoft.shape cross_entropy = Softmax().categorical_cross_entropy( (y.flatten() * y_mask.reshape((batch * time, ))), (presoft.reshape( (batch * time, feat)) * y_mask.reshape((batch * time, 1)))) # renormalization renormalized_cross_entropy = cross_entropy * ( tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask)) # BPC: Bits Per Character unregularized_cost = renormalized_cross_entropy / tensor.log(2) unregularized_cost.name = "cross_entropy" else: # Targets: (Time X Batch X Features) y = tensor.tensor3('targets', dtype=floatX) y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX) y_mask = tensor.set_subtensor( y_mask[:args.context, :], tensor.zeros_like(y_mask[:args.context, :], dtype=floatX)) if args.used_inputs is not None: y_mask = tensor.set_subtensor( y_mask[:args.used_inputs, :], tensor.zeros_like(y_mask[:args.used_inputs, :], dtype=floatX)) # SquaredError does not work on 3D tensor target = (y * y_mask.dimshuffle(0, 1, 'x')) values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x')) target = target.reshape( (target.shape[0] * target.shape[1], target.shape[2])) values = values.reshape( (values.shape[0] * values.shape[1], values.shape[2])) unregularized_cost = SquaredError().apply(target, values) # renormalization unregularized_cost = unregularized_cost * ( tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask)) unregularized_cost.name = "mean_squared_error" # TODO: add regularisation for the cost # the log(1) is here in order to differentiate the two variables # for monitoring cost = unregularized_cost + tensor.log(1) cost.name = "regularized_cost" return cost, unregularized_cost
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def build_intermediate_var_unlabelled(model): # build cost function and computational x = T.tensor4() output = model.apply(x) output = output.reshape((x.shape[0], model.get_dim('output'))) labels = T.argmax(output, axis=1).reshape((x.shape[0], 1)) cost = Softmax().categorical_cross_entropy(labels.flatten(), output).mean() return theano.function([x], build_dictionnary(cost), allow_input_downcast=True, on_unused_input='ignore')
def __init__(self, config, **kwargs): super(Model, self).__init__(config, rec_input_len=4, output_dim=config.tgtcls.shape[0], **kwargs) self.classes = theano.shared(numpy.array(config.tgtcls, dtype=theano.config.floatX), name='classes') self.softmax = Softmax() self.sequences.extend(['latitude_lag', 'longitude_lag']) self.children.append(self.softmax)
def build_model(self, hidden_dim): board_input = T.vector('input') mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)], dims=[9, hidden_dim, 9], weights_init=IsotropicGaussian(0.00001), biases_init=Constant(0.01)) output = mlp.apply(board_input) masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000) mlp.initialize() cost, chosen = self.get_cost(masked_output) return board_input, mlp, cost, chosen, output
def put_labels(model, data_labelled, data_unlabelled, f_predict=None, f_loss=None): x = T.tensor4('x') y = T.imatrix() y_prev = model.apply(x) if f_predict is None: # define prediction function y_softmax = Softmax().apply(y_prev) prediction = T.argmax(y_softmax, axis=1) f_predict = theano.function([x], prediction, allow_input_downcast=True) if f_loss is None: cost = Softmax().categorical_cross_entropy(y.flatten(), y_prev).mean() f_loss = theano.function([x, y], cost, allow_input_downcast=True) # now proceed to the mean loss batch_size = 64 x_train_L, y_train_L = data_labelled y_train_U = [] x_train_U = data_unlabelled n_train_U = len(x_train_U) / batch_size # pay attention to the shape of y_train !!!!!!!!!! for index in range(n_train_U): y_train_batch = f_predict(x_train_U[batch_size * index:(index + 1) * batch_size]) y_train_U.append(y_train_batch) # what is the type of predict if n_train_U * batch_size < len(x_train_U): x_batch = x_train_U[n_train_U * batch_size:] y_train_U.append(f_predict(x_batch)) y_train_U = np.concatenate(y_train_U, axis=0)[:, None] assert len(y_train_U) == len( x_train_U), "problem : length does not match for unlabelled data" y_train = np.concatenate([y_train_L, y_train_U], axis=0) x_train = np.concatenate([x_train_L, x_train_U], axis=0) return (x_train, y_train), f_loss
def test_softmax_vector(): x = tensor.matrix('x') y = tensor.lvector('y') softmax_out = Softmax().apply(x) cost = CategoricalCrossEntropy().apply(y, softmax_out) cost_stable = Softmax().categorical_cross_entropy(y, x) softmax_cost_func = function([x, y], cost) softmax_cost_stable_func = function([x, y], cost_stable) batch_size = 100 x_size = 10 rng = numpy.random.RandomState(1) x_val = rng.randn(batch_size, x_size).astype(theano.config.floatX) y_val = rng.randint(low=0, high=x_size, size=(batch_size)) softmax_cost = softmax_cost_func(x_val, y_val) softmax_cost_stable = softmax_cost_stable_func(x_val, y_val) assert_allclose(softmax_cost, softmax_cost_stable)
def build_intermediate_var_labelled(model): # build cost function and computational x = T.tensor4() y = T.imatrix() output = model.apply(x) output = output.reshape( (x.shape[0], model.get_dim('output'))) #TO DO : get_dim('name') for Architecture cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean() return theano.function([x, y], build_dictionnary(cost), allow_input_downcast=True, on_unused_input='ignore')
def __init__(self, config, prefix_encoder, candidate_encoder, **kwargs): super(MemoryNetworkBase, self).__init__(**kwargs) self.prefix_encoder = prefix_encoder self.candidate_encoder = candidate_encoder self.config = config self.softmax = Softmax() self.children = [self.softmax, prefix_encoder, candidate_encoder] self.inputs = self.prefix_encoder.apply.inputs \ + ['candidate_%s'%x for x in self.candidate_encoder.apply.inputs] \ + ['candidate_destination_latitude', 'candidate_destination_longitude']