Ejemplo n.º 1
0
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1])
error_rate.name = 'error_rate'

# Initialization
for brick in (x_to_h1, h1_to_o):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0)
    brick.initialize()
rnn.weights_init = Identity()
rnn.biases_init = Constant(0)
rnn.initialize()

print 'Bulding training process...'
algorithm = GradientDescent(cost=cost,
                            parameters=ComputationGraph(cost).parameters,
                            step_rule=learning_algorithm(
                                learning_rate=1e-6,
                                momentum=0.0,
                                clipping_threshold=1.0,
                                algorithm='adam'))

train_stream, valid_stream = MNIST(batch_size=batch_size)

monitor_train_cost = TrainingDataMonitoring([cost, error_rate],
                                            prefix="train",
Ejemplo n.º 2
0
#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode',
                input_dim=h_dim,
                output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat  = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'
Ejemplo n.º 3
0
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1])
error_rate.name = 'error_rate'

# Initialization
for brick in (x_to_h1, h1_to_o):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0)
    brick.initialize()
rnn.weights_init = Identity()
rnn.biases_init = Constant(0)
rnn.initialize()

print 'Bulding training process...'
algorithm = GradientDescent(
    cost=cost,
    parameters=ComputationGraph(cost).parameters,
    step_rule=learning_algorithm(learning_rate=1e-6, momentum=0.0,
                                 clipping_threshold=1.0, algorithm='adam'))


cg = ComputationGraph(cost)
params_to_sync = {}
#cg.variables
counter = 0
print "---- cg.parameters ----"
Ejemplo n.º 4
0
lstm = SimpleRecurrent(dim=h_dim, activation=Tanh())

#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode', input_dim=h_dim, output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'