sym_target, sym_mask, db='COST TRAIN:') cost_val = costfun( l_out.get_output(input_dict, deterministic=True,mask=sym_mask), sym_target, sym_mask, db='COST VAL:') print "DONE" # Get a list of all parameters in the network all_params = lasagne.layers.get_all_params(l_out) # Given a cost function (cost_train) and a list of parameters Theano can # calculate the gradients and update rules w.r.t to each parameter. # We use adadelta, which automatically tunes the learning rate. # adadelta_normscaled returns a list of update rules for each parameter updates = adadelta_normscaled( cost_train, all_params,batch_size=BATCH_SIZE,learning_rate=1.0, epsilon=10e-6, max_norm=0.02, verbose=VERBOSE) print "CALCULATING UPDATES...", #updates = nesterov_normscaled( cost_train, all_params, 0.01, 0.5, BATCH_SIZE) print "DONE" # print number of params total_params = sum([p.get_value().size for p in all_params]) print "#NETWORK params:", total_params # These lists specify that sym_input should take the value of sh_input and etc. # Note the cast: T.cast(sh_target, 'int32'). This is nessesary because Theano # does only support shared varibles with type float32. We cast the shared # value to an integer before it is used in the graph.
cost_train = costfun( l_out.get_output(input_dict, deterministic=False,mask=mask), target_output, mask, db='COST TRAIN:') cost_val = costfun( l_out.get_output(input_dict, deterministic=True,mask=mask), target_output, mask, db='COST VAL:') all_params = lasagne.layers.get_all_params(l_out) #updates = adadelta_normscaled( # cost_train, all_params,batch_size=BATCH_SIZE,learning_rate=1.0, # epsilon=10e-6, max_norm=MAX_NORM_GRADIENTS, verbose=VERBOSE) updates = nesterov_normscaled( cost_train, all_params, 0.01, 0.5, BATCH_SIZE) if UNITTYPE_RNN == 'LAunits': updates_launits = adadelta_normscaled(cost_train, params_launits, batch_size=BATCH_SIZE,learning_rate=1.0, epsilon=10e-6, max_norm=MAX_NORM_GRADIENTS, verbose=VERBOSE, weight_decay = LAUNITL2) updates.extend(updates_launits) # print number of params total_params = sum([p.get_value().size for p in all_params]) if UNITTYPE_RNN == 'LAunit': launit_params = sum([p.get_value().size for p in params_launits]) print "#NETWORK params:", total_params, "LAunit params", launit_params else: print "#NETWORK params:", total_params logger.info('Compiling functions...') givens = [(input, sh_input), (target_output, T.cast(sh_target_output, 'int32')), (mask, sh_mask)]