def test_basic_momentum():
    a = shared_floatx([3, 4])
    cost = (a**2).sum()
    steps, updates = BasicMomentum(0.5).compute_steps(
        OrderedDict([(a, tensor.grad(cost, a))]))
    f = theano.function([], [steps[a]], updates=updates)
    assert_allclose(f()[0], [6., 8.])
    assert_allclose(f()[0], [9., 12.])
    assert_allclose(f()[0], [10.5, 14.])
def test_basic_nesterov_momentum():
    a = shared_floatx([3, 4])
    cost = (a**2).sum()
    steps, updates = BasicNesterovMomentum(0.5).compute_steps(
        OrderedDict([(a, tensor.grad(cost, a))]))
    f = theano.function([], [steps[a]], updates=updates)
    steps_classic, updates_classic = BasicMomentum(0.5).compute_steps(
        OrderedDict([(a, tensor.grad(cost, a))]))
    f_classic = theano.function([], [steps_classic[a]],
                                updates=updates_classic)
    f_classic()  # One call for the "peek ahead" of the Nesterov momentum.
    assert_allclose(f()[0], f_classic()[0])
    assert_allclose(f()[0], f_classic()[0])
    assert_allclose(f()[0], f_classic()[0])
def test_basic_momentum_broadcastable():
    verify_broadcastable_handling(BasicMomentum(0.5))
n_entities = 550
embed_size = 200

ctx_lstm_size = [256]
ctx_skip_connections = True

question_lstm_size = [256]
question_skip_connections = True

attention_mlp_hidden = [100]
attention_mlp_activations = [Tanh()]

out_mlp_hidden = []
out_mlp_activations = []

step_rule = CompositeRule([
    RMSProp(decay_rate=0.95, learning_rate=5e-5),
    BasicMomentum(momentum=0.9)
])

dropout = 0.2
w_noise = 0.

valid_freq = 1000
save_freq = 1000
print_freq = 100

weights_init = IsotropicGaussian(0.01)
biases_init = Constant(0.)
out_dim = 1
hidden_dim = 64
activation_function = Tanh()
activation_function_name = 'Tanh'
batch_size = 100 
w_noise_std = 0.01
i_dropout = 0.5
proportion_train = 0.9
algo = 'RMS'
learning_rate_value = 1e-5
momentum_value = 0.9
decay_rate_value = 0
StepClipping_value = 2

step_rule = CompositeRule([RMSProp(learning_rate=learning_rate_value), #decay_rate=decay_rate_value,
                          BasicMomentum(momentum=momentum_value),
                          StepClipping(StepClipping_value)])
print_freq = 1000
valid_freq = 10000
save_freq = 10000

class Model():
    def __init__(self):
        inp = tensor.tensor3('input')
        inp = inp.dimshuffle(1,0,2)
        target = tensor.matrix('target')
        target = target.reshape((target.shape[0],))
        product = tensor.lvector('product')
        missing = tensor.eq(inp, 0)
        train_input_mean = 1470614.1
        train_input_std = 3256577.0