Ejemplo n.º 1
0
    sym_target, sym_mask, db='COST TRAIN:')
cost_val = costfun(
    l_out.get_output(input_dict, deterministic=True,mask=sym_mask),
    sym_target, sym_mask, db='COST VAL:')
print "DONE"


# Get a list of all parameters in the network
all_params = lasagne.layers.get_all_params(l_out)

# Given a cost function (cost_train) and a list of parameters Theano can
# calculate the gradients and update rules w.r.t to each parameter.
# We use adadelta, which automatically tunes the learning rate.
# adadelta_normscaled returns a list of update rules for each parameter
updates = adadelta_normscaled(
    cost_train, all_params,batch_size=BATCH_SIZE,learning_rate=1.0,
    epsilon=10e-6, max_norm=0.02, verbose=VERBOSE)

print "CALCULATING UPDATES...",
#updates = nesterov_normscaled( cost_train, all_params, 0.01, 0.5, BATCH_SIZE)
print "DONE"

# print number of params
total_params = sum([p.get_value().size for p in all_params])
print "#NETWORK params:", total_params


# These lists specify that sym_input should take the value of sh_input and etc.
# Note the cast: T.cast(sh_target, 'int32'). This is nessesary because Theano
# does only support shared varibles with type float32. We cast the shared
# value to an integer before it is used in the graph.
Ejemplo n.º 2
0
cost_train = costfun(
    l_out.get_output(input_dict, deterministic=False,mask=mask),
    target_output, mask, db='COST TRAIN:')
cost_val = costfun(
    l_out.get_output(input_dict, deterministic=True,mask=mask),
    target_output, mask, db='COST VAL:')

all_params = lasagne.layers.get_all_params(l_out)

#updates = adadelta_normscaled(
#    cost_train, all_params,batch_size=BATCH_SIZE,learning_rate=1.0,
#    epsilon=10e-6, max_norm=MAX_NORM_GRADIENTS, verbose=VERBOSE)
updates = nesterov_normscaled( cost_train, all_params, 0.01, 0.5, BATCH_SIZE)
if UNITTYPE_RNN == 'LAunits':
        updates_launits = adadelta_normscaled(cost_train, params_launits,
                                batch_size=BATCH_SIZE,learning_rate=1.0, epsilon=10e-6,
                                max_norm=MAX_NORM_GRADIENTS, verbose=VERBOSE, weight_decay = LAUNITL2)
        updates.extend(updates_launits)

# print number of params
total_params = sum([p.get_value().size for p in all_params])
if UNITTYPE_RNN == 'LAunit':
    launit_params = sum([p.get_value().size for p in params_launits])
    print "#NETWORK params:", total_params, "LAunit params", launit_params
else:
    print "#NETWORK params:", total_params

logger.info('Compiling functions...')
givens = [(input, sh_input),
          (target_output, T.cast(sh_target_output, 'int32')),
         (mask, sh_mask)]