def build_updates(parameters, gradients, update_vars, initial_learning_rate, momentum): update_vars._learning_rate = initial_learning_rate return updates.momentum(parameters, gradients, P=P, learning_rate=update_vars._learning_rate, mu=momentum)
def turing_updates(cost , lr) : params = P.values() #whether add P weight decay l2 = T.sum(0) for p in params: l2 = l2 + (p ** 2).sum() all_cost = cost + 1e-3 * l2 clipper = updates.clip(5.) g = T.grad(all_cost, wrt=params) grads = clipper(g) return updates.momentum(params, grads, mu = 0, learning_rate=lr)
def turing_updates(cost , lr) : params = P.values() #whether add P weight decay l2 = T.sum(0) """ for p in params: l2 = l2 + (p ** 2).sum() all_cost = cost + 1e-3 * l2 """ all_cost = cost grads = [T.clip(g, -100, 100) for g in T.grad(all_cost, wrt=params)] return updates.momentum(params, grads, mu=0, learning_rate=lr)
def turing_updates(cost , lr) : params = P.values() #whether add P weight decay l2 = T.sum(0).astype(theano.config.floatX) for p in params: l2 = l2 + (p ** 2).sum().astype(theano.config.floatX) all_cost = cost + 1e-3 * l2 clipper = updates.clip(5.) g = T.grad(all_cost, wrt=params) grads = clipper(g) # grads = [T.clip(g, -5, 5) for g in T.grad(all_cost, wrt=params)] # return updates.rmsprop(params, grads, learning_rate=lr) return updates.momentum(params, grads, mu = 0, learning_rate=lr)
probs = predict(X) alpha = 0.5 params = P.values() cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [theano.shared(0 * p.get_value()) for p in params] counter = theano.shared(np.float32(0.)) acc = theano.function(inputs=[X, Y], outputs=cost, updates=[(a, a + g) for a, g in zip(gradient_acc, gradients)] + [(counter, counter + np.float32(1.))]) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \ + [ (counter,np.float32(0.)) ] ) test = theano.function(inputs=[X, Y], outputs=probs[:, Y]) training_examples = [word.strip() for word in open('dictionary.txt')] import random for _ in xrange(1500): random.shuffle(training_examples) for i, string in enumerate(training_examples): print acc(font.imagify(string), label_seq(string)) if i % 20 == 0: update() if i % 100 == 0: hinton.plot(test(font.imagify("test"), label_seq("test")).T,
params = P.values() cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [ theano.shared(0 * p.get_value()) for p in params ] counter = theano.shared(np.float32(0.)) acc = theano.function( inputs=[X, Y], outputs=cost, updates = [ (a,a + g) for a,g in zip(gradient_acc,gradients) ] + [(counter,counter + np.float32(1.))] ) update = theano.function( inputs=[],outputs=[], updates = updates.momentum(params,[ g / counter for g in gradient_acc ]) \ + [ (a, np.float32(0) * a) for a in gradient_acc ] \ + [ (counter,np.float32(0.)) ] ) test = theano.function( inputs=[X,Y], outputs=probs[:,Y] ) training_examples = [ word.strip() for word in open('dictionary.txt') ] import random for _ in xrange(1500): random.shuffle(training_examples) for i,string in enumerate(training_examples): print acc(font.imagify(string),label_seq(string))
def build_updates(parameters,gradients,update_vars,initial_learning_rate,momentum): update_vars._learning_rate = initial_learning_rate return updates.momentum(parameters,gradients,P=P, learning_rate=update_vars._learning_rate, mu=momentum)
alpha = 0.5 params = P.values() cost = ctc.cost(probs, Y) # + 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [theano.shared(0 * p.get_value()) for p in params] counter = theano.shared(np.float32(0.)) acc = theano.function(inputs=[X, Y], outputs=cost, updates=[(a, a + g) for a, g in zip(gradient_acc, gradients)] + [(counter, counter + np.float32(1.))]) update = theano.function(inputs=[], outputs=[], updates=updates.momentum( params, [g / counter for g in gradient_acc], ) + [(a, np.float32(0) * a) for a in gradient_acc] + [(counter, np.float32(0.))]) test = theano.function(inputs=[X, Y], outputs=probs[:, Y]) training_examples = [word.strip() for word in open('dictionary.txt')] import random for _ in xrange(1500): random.shuffle(training_examples) for i, string in enumerate(training_examples): print acc(font.imagify(string), label_seq(string)) if i % 20 == 0: update() if i % 100 == 0: hinton.plot(test(font.imagify("test"), label_seq("test")).T,
x = np.append(x, np.fliplr(x)) y = np.append(y,y) return x,y if __name__ == "__main__": print "Loading dataset..." x,y = datasets.transcription_factor() x,y = preprocess(x,y) batches = make_batches(x, y) print "Compiling theano..." X = T.fmatrix('X') Y = T.fmatrix('Y') P = Parameters() net = model.build(P) Y_hat = net(X) predict = theano.function([X], Y_hat, allow_input_downcast=True) cost = model.cost(P, Y_hat, Y) print "Calculating gradient..." params = P.values() grad = T.grad(cost, wrt = params) grad = [g.astype(theano.config.floatX) for g in grad] #idek... train = theano.function([X,Y], cost, updates=updates.momentum(params, grad), allow_input_downcast=True) trainer = Trainer(batches, train, predict) # trainer()
layer, reconstruct( corrupt(layer), W,b,b_rec, input_layer = (layer.name == 'X') ), kl_divergence = (layer.name != 'X') ) lr = 0.003 if layer.name == 'X' else 0.01 parameters = [W,b,b_rec] gradients = T.grad(loss,wrt=parameters) train_fns.append( chunk.build_trainer( inputs=[X], # outputs=loss, updates=updates.momentum(parameters,gradients,learning_rate=lr), mapping=shared_variables_mapping ) ) validation_fns.append( validator.build( inputs=[X], outputs={"loss":loss}, monitored_var="loss", validation_stream=frame_data.validation_stream, callback=build_validation_callback(P) ) ) logging.info("Done compiling for layer %s"%layer)
cost = ctc.cost(probs, Y) # + 1e-8 * sum(T.sum(T.sqr(w)) for w in params) gradients = T.grad(cost, wrt=params) gradient_acc = [theano.shared(0 * p.get_value()) for p in params] counter = theano.shared(np.float32(0.)) acc = theano.function( inputs=[X, Y], outputs=cost, updates=[ (a, a + g) for a, g in zip(gradient_acc, gradients) ] + [(counter, counter + np.float32(1.))] ) update = theano.function( inputs=[], outputs=[], updates=updates.momentum( params, [g / counter for g in gradient_acc], ) + [(a, np.float32(0) * a) for a in gradient_acc] + [(counter, np.float32(0.))] ) test = theano.function( inputs=[X, Y], outputs=probs[:, Y] ) training_examples = [word.strip() for word in open('dictionary.txt')] import random for _ in xrange(1500): random.shuffle(training_examples) for i, string in enumerate(training_examples): print acc(font.imagify(string), label_seq(string)) if i % 20 == 0: update()
loss = cost(layer, reconstruct(corrupt(layer), W, b, b_rec, input_layer=(layer.name == 'X')), kl_divergence=(layer.name != 'X')) lr = 0.003 if layer.name == 'X' else 0.01 parameters = [W, b, b_rec] gradients = T.grad(loss, wrt=parameters) train_fns.append( chunk.build_trainer( inputs=[X], # outputs=loss, updates=updates.momentum(parameters, gradients, learning_rate=lr), mapping=shared_variables_mapping)) validation_fns.append( validator.build(inputs=[X], outputs={"loss": loss}, monitored_var="loss", validation_stream=frame_data.validation_stream, callback=build_validation_callback(P))) logging.info("Done compiling for layer %s" % layer) save(P) for i, (train_fn, validation_fn) in enumerate(zip(train_fns, validation_fns)): logging.info("Starting pre-training for epoch %d" % i)